Compare commits
80 Commits
v0.3.13-be
...
v0.3.13
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2b47d4a811 | ||
|
|
e977c512a7 | ||
|
|
0828c0a4db | ||
|
|
ecee7bfa56 | ||
|
|
7a0485c0dc | ||
|
|
d4ff6143f4 | ||
|
|
8aa8b30a43 | ||
|
|
019bdcae6f | ||
|
|
edfea2dbf2 | ||
|
|
eeec8e0d17 | ||
|
|
e79ab3c2e3 | ||
|
|
2719cf4930 | ||
|
|
d6d89aac43 | ||
|
|
973c738059 | ||
|
|
2345192728 | ||
|
|
dc1c71a01d | ||
|
|
b93b646d41 | ||
|
|
75354703ce | ||
|
|
7123f004e9 | ||
|
|
3dac44d326 | ||
|
|
76de7e5db9 | ||
|
|
95e0b3408d | ||
|
|
358440779f | ||
|
|
6518eebce7 | ||
|
|
e19df82147 | ||
|
|
3ec3f90d9f | ||
|
|
85452eeb10 | ||
|
|
e4520007ea | ||
|
|
63b55a9560 | ||
|
|
4dbc1ca740 | ||
|
|
06c585885e | ||
|
|
5bede68a82 | ||
|
|
1a83456d77 | ||
|
|
58d8ce96d9 | ||
|
|
5aec281e87 | ||
|
|
bcbf561887 | ||
|
|
813f300a15 | ||
|
|
a356585cf8 | ||
|
|
689881f981 | ||
|
|
d473369f37 | ||
|
|
5fcd605b7d | ||
|
|
2ea9dfee60 | ||
|
|
07ad6820cc | ||
|
|
1c8523c6dd | ||
|
|
84f682aaa7 | ||
|
|
efc03a90b5 | ||
|
|
8f3562a0c8 | ||
|
|
b15f7f68ae | ||
|
|
b646b3095b | ||
|
|
261ea5db2e | ||
|
|
ff93377459 | ||
|
|
f90befdfbe | ||
|
|
d490d6d457 | ||
|
|
3fdaa448af | ||
|
|
7f88643361 | ||
|
|
55dc817843 | ||
|
|
6e2969d8a6 | ||
|
|
9a146ba2f0 | ||
|
|
5f224806e2 | ||
|
|
77f048abda | ||
|
|
e45efc08a5 | ||
|
|
d1dd91d5c4 | ||
|
|
106ef0c417 | ||
|
|
8989b7130a | ||
|
|
0663708afb | ||
|
|
d58aba71b8 | ||
|
|
9d364f99e2 | ||
|
|
edb62f0f38 | ||
|
|
e771eaf0a4 | ||
|
|
199b5c9e67 | ||
|
|
5d121935d2 | ||
|
|
ee0677b6e5 | ||
|
|
11325e99c4 | ||
|
|
fc5a6b5af3 | ||
|
|
65d17ceea3 | ||
|
|
8a57ada148 | ||
|
|
82e07b4de3 | ||
|
|
6ca5645656 | ||
|
|
a75ff21305 | ||
|
|
a7b00494e5 |
5
.github/ISSUE_TEMPLATE/config.yml
vendored
5
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,5 +1,8 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Ask a question
|
||||
url: https://github.com/florisboard/florisboard/discussions/new?category=q-a
|
||||
about: Ask here if you have a question about FlorisBoard or need assistance
|
||||
- name: General feedback
|
||||
url: https://github.com/florisboard/florisboard/blob/master/CONTRIBUTING.md
|
||||
url: https://github.com/florisboard/florisboard/discussions/new?category=feedback
|
||||
about: Give general feedback about this project
|
||||
|
||||
15
.github/ISSUE_TEMPLATE/feature_request.md
vendored
15
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -8,12 +8,11 @@ assignees: ''
|
||||
---
|
||||
|
||||
<!--
|
||||
- Describe your idea in a short but concise way.
|
||||
- If you have multiple ideas which are not directly connected to each
|
||||
other, file an issue per idea. This makes it easy to implement one
|
||||
feature proposal at a time.
|
||||
- If you have any examples, e.g. screenshots or other keyboards which
|
||||
have the proposed feature implemented, link them here.
|
||||
- Please search existing proposals to avoid creating duplicates.
|
||||
- Thank you for your help in making FlorisBoard better!
|
||||
Thank you for your help in making FlorisBoard better!
|
||||
|
||||
Guide to a good feature-request:
|
||||
• Please search existing proposals to avoid creating duplicates.
|
||||
• If you have multiple ideas which are not directly connected to other, file a new issue for each idea. This makes it easier to implement your proposals.
|
||||
• Describe your idea in a short but concise way.
|
||||
• If you have any examples, e.g. screenshots or other keyboards have the proposed feature implemented, feel free to post them after your description.
|
||||
-->
|
||||
|
||||
16
.github/ISSUE_TEMPLATE/question.md
vendored
16
.github/ISSUE_TEMPLATE/question.md
vendored
@@ -1,16 +0,0 @@
|
||||
---
|
||||
name: Question
|
||||
about: Ask here if you have a question about FlorisBoard
|
||||
title: ''
|
||||
labels: question
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
<!--
|
||||
- If you need assistance in using FlorisBoard, ask it here!
|
||||
- If you want to suggest an idea for this project, please use the
|
||||
Feature request template instead.
|
||||
- Please search existing questions to avoid creating duplicates.
|
||||
- Thank you for your help in making FlorisBoard better!
|
||||
-->
|
||||
8
.github/workflows/android.yml
vendored
8
.github/workflows/android.yml
vendored
@@ -11,11 +11,13 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: set up JDK 1.8
|
||||
- uses: actions/checkout@v2
|
||||
- name: Checkout submodules
|
||||
run: git submodule update --init --recursive
|
||||
- name: set up JDK 11
|
||||
uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: 1.8
|
||||
java-version: 11
|
||||
- name: Setup CMake and Ninja
|
||||
uses: lukka/get-cmake@v3.20.1
|
||||
- uses: actions/cache@v2
|
||||
|
||||
6
.gitmodules
vendored
6
.gitmodules
vendored
@@ -1,3 +1,3 @@
|
||||
[submodule "app/src/main/cpp/icu4c/android"]
|
||||
path = app/src/main/cpp/icu4c/android
|
||||
url = https://github.com/patrickgold/icu4c-android
|
||||
[submodule "app/src/main/icu4c"]
|
||||
path = app/src/main/icu4c
|
||||
url = https://github.com/florisboard/icu4c
|
||||
|
||||
@@ -7,8 +7,13 @@ provides some general guidelines for each type of contribution.
|
||||
|
||||
## Giving general feedback
|
||||
|
||||
Either use the review function within Google Play or email me at
|
||||
[florisboard@patrickgold.dev](mailto:florisboard@patrickgold.dev). I
|
||||
NEW! You can now [give general feedback](https://github.com/florisboard/florisboard/discussions/new?category=feedback)
|
||||
directly here on GitHub. This is the preferred way to give feedback, as
|
||||
it allows not only for me to read and respond to feedback, but for everyone
|
||||
in this community.
|
||||
|
||||
Optionally you can also use the review function within Google Play or email me
|
||||
at [florisboard@patrickgold.dev](mailto:florisboard@patrickgold.dev). I
|
||||
love to hear from you! Note, that the amount of feedback emails I get
|
||||
is overwhelmingly high - so if I don't answer or answer really late, I
|
||||
apologize - I guarantee though that I read through every email and that
|
||||
|
||||
74
README.md
74
README.md
@@ -64,7 +64,7 @@ alt="Preview image">
|
||||
## Implemented features
|
||||
This list contains all implemented and fully functional features
|
||||
FlorisBoard currently has to offer. For planned features and its
|
||||
milestones, please refer to the [Feature roadmap](#feature-roadmap).
|
||||
milestones, please refer to the [Feature roadmap](ROADMAP.md).
|
||||
|
||||
### Basics
|
||||
* [x] Implementation of the keyboard core (InputMethodService)
|
||||
@@ -72,14 +72,14 @@ milestones, please refer to the [Feature roadmap](#feature-roadmap).
|
||||
* [x] Caps + Caps Lock
|
||||
* [x] Key popups
|
||||
* [x] Extended key popups (e.g. a -> á, à, ä, ...)
|
||||
* [x] Key press sound/vibration
|
||||
* [x] Audio/haptic feedback for keyboard touch interaction
|
||||
* [x] Portrait orientation support
|
||||
* [x] Landscape orientation support (needs tweaks)
|
||||
|
||||
### Layouts
|
||||
* [x] Latin character layouts (QWERTY, QWERTZ, AZERTY, Swiss, Spanish, Norwegian, Swedish/Finnish, Icelandic, Danish,
|
||||
Hungarian, Croatian, Polish, Romanian, Colemak, Dvorak, Turkish-Q, Turkish-F, and more...)
|
||||
* [x] Non-latin character layouts (Arabic, Persian, Kurdish, Greek, Russian (JCUKEN), and more...)
|
||||
* [x] Non-latin character layouts (Arabic, Persian, Kurdish, Greek, Russian (JCUKEN), Japanese JIS, and more...)
|
||||
* [x] Adapt to situation in app (password, url, text, etc. )
|
||||
* [x] Special character layout(s)
|
||||
* [x] Numeric layout
|
||||
@@ -106,79 +106,17 @@ milestones, please refer to the [Feature roadmap](#feature-roadmap).
|
||||
* [x] Clipboard manager/history
|
||||
* [x] Integrated number row / symbols in character layouts
|
||||
* [x] Gesture support
|
||||
* [x] System-wide spell checker with spell results from FlorisBoard
|
||||
* [x] Full support for the system user dictionary (shared dictionary
|
||||
between all keyboards) and a private, internal user dictionary
|
||||
* [x] Full integration in IME service list of Android (xml/method)
|
||||
(integration is internal-only, because Android's default subtype
|
||||
implementation not really allows for dynamic language/layout
|
||||
pairs, only compile-time defined ones)
|
||||
* [ ] Description and settings reference in System Language & Input
|
||||
* [x] Description and settings reference in System Language & Input
|
||||
* [ ] (dev only) Generate well-structured documentation of code
|
||||
* [ ] ...
|
||||
|
||||
## Feature roadmap
|
||||
This section describes the features which are planned to be implemented
|
||||
in FlorisBoard for the next major versions, modularized into sections.
|
||||
Please note that the milestone due dates are only raw estimates and will
|
||||
most likely be delayed back, even though I'm eager to stick to these as
|
||||
close as possible.
|
||||
|
||||
### [v0.4.0](https://github.com/florisboard/florisboard/milestone/4)
|
||||
- Module A: Smartbar rework (Implemented with [#91])
|
||||
- Ability to enable/disable Smartbar (features below thus only work if
|
||||
Smartbar is enabled)
|
||||
- Dynamic switching between clipboard tools and word suggestions
|
||||
- Ability to show both the number row and word suggestions at once
|
||||
- Better icons in quick actions
|
||||
- Complete rework of the Smartbar code base and the Smartbar layout
|
||||
definition in XML
|
||||
|
||||
- Module B: Composing suggestions (Phase 1: [#329])
|
||||
- Auto-suggestion of words based of precompiled dictionaries
|
||||
- Management of custom dictionary entries
|
||||
- Next-word suggestions by training language models. Data collected here is stored locally and never leaves
|
||||
the user's device.
|
||||
|
||||
- Module C: Extension packs (Implemented with [#162], reworked several times and still not stable)
|
||||
- Ability to load dictionaries (and later potentially other cool
|
||||
features too) only if needed to keep the core APK size small
|
||||
- Currently unclear how exactly this will work, but this is definitely
|
||||
a must-have feature
|
||||
- A full implementation may come only in v0.5.0
|
||||
|
||||
- Module D: Glide typing (Implemented with [#544])
|
||||
- Swiping over the characters will automatically convert this to a word
|
||||
- Possibly also add improvements based on the Flow keyboard
|
||||
|
||||
- Module E: Theme rework (Implemented with [#162])
|
||||
- Themes are now based on the Asset schema
|
||||
- Dynamic theme creation
|
||||
- Different theme modes (`Always day`, `Always night`, `Follow system`
|
||||
and `Follow time`)
|
||||
- Define a separate theme both for day and night theme
|
||||
- Adapt to app theme if possible
|
||||
- Theme import/export
|
||||
|
||||
### [v0.5.0](https://github.com/florisboard/florisboard/milestone/5)
|
||||
There's no exact roadmap yet, but these are the most important points:
|
||||
- Full layout customization in runtime
|
||||
- Extensive rework and customization of the media input (emojis, emoticons, kaomoji)
|
||||
- Better Smartbar customization
|
||||
- As an extension GIF support
|
||||
|
||||
### > v0.5.0
|
||||
This is completely open as of now and will gather planned features as time
|
||||
passes...
|
||||
|
||||
Backlog (currently not assigned to any milestone):
|
||||
|
||||
- Floating keyboard
|
||||
|
||||
[#91]: https://github.com/florisboard/florisboard/pull/91
|
||||
[#162]: https://github.com/florisboard/florisboard/pull/162
|
||||
[#329]: https://github.com/florisboard/florisboard/pull/329
|
||||
[#544]: https://github.com/florisboard/florisboard/pull/544
|
||||
|
||||
## Contributing
|
||||
Wanna contribute to FlorisBoard? That's great to hear! There are lots of
|
||||
different ways to help out. Bug reporting, making pull requests,
|
||||
@@ -207,8 +145,6 @@ to get more information on this topic.
|
||||
[The Unicode Consortium](https://github.com/unicode-org)
|
||||
* [Nuspell](https://github.com/nuspell/nuspell) by
|
||||
[Nuspell](https://github.com/nuspell)
|
||||
* [TokyoCabinet (only used glob.h and glob.c)](https://github.com/white-gecko/TokyoCabinet) by
|
||||
[Natanael Arndt](https://github.com/white-gecko)
|
||||
|
||||
## License
|
||||
```
|
||||
|
||||
106
ROADMAP.md
Normal file
106
ROADMAP.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# FlorisBoard's feature roadmap & milestones
|
||||
|
||||
This feature roadmap intents to provide transparency to what I want to add
|
||||
to FlorisBoard in the foreseeable future. Note that there are no ETAs for any
|
||||
version milestones down below, experience says these won't hold anyways.
|
||||
|
||||
I try my best to release regularly, though some features take a lot longer
|
||||
than others and thus releases can be spaced out a bit on the stable track.
|
||||
If you are interested in following the development more closely, make sure to
|
||||
follow along the beta track releases! These are generally more unstable but
|
||||
you get new stuff faster and can provide early feedback, which helps a lot!
|
||||
|
||||
## 0.3.x and 0.4.0
|
||||
Releases in this section still follow the old versioning scheme, meaning the
|
||||
patch number is a feature upgrade. As this naming convention is more confusing
|
||||
than useful, after the v0.4.0 release a new release/development cycle will be
|
||||
introduced.
|
||||
|
||||
### 0.3.13 (currently in development and soon done)
|
||||
- Spell checking (mainly completed and relatively well working, Smartbar integration still missing)
|
||||
- Performance improvements in keyboard rendering
|
||||
- Audio/haptic feedback rework
|
||||
- Lots and lots of bug fixing in all areas, really fix some annoying bugs
|
||||
- New layouts added by contributors
|
||||
|
||||
### 0.3.14
|
||||
- Re-write of the Preference core
|
||||
- Reduce redundancy in key/default value definitions
|
||||
- Avoid having to manually add redundant code for adding a new pref
|
||||
- Goes hand-in-hand with the Settings UI re-write
|
||||
- Re-write of the Settings UI with Jetpack Compose
|
||||
- Also re-structure UI into a more list-like panel
|
||||
- Adjust theme colors of Settings a bit to make it more modern
|
||||
- Preview the keyboard at any time from within the Settings
|
||||
- Settings language different than device language
|
||||
- Re-write the Setup UI in Jetpack Compose
|
||||
- Simplify screen based on previously discussed ideas and mock-ups
|
||||
- Improve backend setup logic
|
||||
- Implement base-UI for extensions and further continue development
|
||||
of existing Flex (FlorisBoard extension) format
|
||||
- Allows for a continuous experience of customizing FlorisBoard in different areas
|
||||
- Planned in the future (not in this version though) what will use Flex:
|
||||
- Themes
|
||||
- Layouts (Characters, symbols, numeric, ...)
|
||||
- Composers for non-Latin script languages
|
||||
- Word suggestion dictionaries
|
||||
- Spell check dictionaries
|
||||
- User dictionaries
|
||||
- Other features that require only data and no logic
|
||||
- Maybe full backup of preferences? Not 100% confirmed though and may be pushed back
|
||||
|
||||
### 0.3.15
|
||||
- Re-adding word suggestions (at least for Latin-based languages at first)
|
||||
- Importing the dictionaries as well as management relies on the Flex extension core and UI in Kotlin
|
||||
- Actually parsing and generating suggestions happens in C++ to avoid another OOM catastrophe like in 0.3.9/10
|
||||
- The actual format of the dictionary and word list source is not decided yet
|
||||
- Improvement of the candidate view in Smartbar (for word suggestions)
|
||||
- Theme rework part I:
|
||||
- Custom key corner radius
|
||||
- Custom key border color (not shadow!!)
|
||||
- Re-work theme internals so they use Flex format
|
||||
- Community repository on GitHub for theme sharing across users (when Theme Flex format is ready)
|
||||
|
||||
### 0.4.0
|
||||
- Prepare FlorisBoard repository and app store presence for public beta release
|
||||
on Google Play
|
||||
- Rework branding images and texts of FlorisBoard for the app stores
|
||||
- Focus on polishing the app and fixing bugs/crashes
|
||||
|
||||
With this release the versioning scheme changes: the second number now indicates new features,
|
||||
changes in the third "patch" number now indicates bug fixes for the stable track. The development
|
||||
cycle for each 0.x release will have -betaXX and -rcXX (release candidate) releases on the beta
|
||||
track for interested people to follow along the development.
|
||||
|
||||
## 0.5.0
|
||||
- Complete rework of the Emoji panel
|
||||
- Recently used / Emoji history
|
||||
- Emoji search
|
||||
- Emoji suggestions when using :emoji_name: syntax
|
||||
- Kaomoji panel implementation (the third tab which currently has "not yet implemented")
|
||||
- Full Smartbar customization
|
||||
- Includes internal rework how Smartbar is build and assembled
|
||||
- Allow for more than one Smartbar / Stackable and Collapsible Smartbars
|
||||
- Customizable quick actions, clipboard row
|
||||
|
||||
## 0.6.0
|
||||
- Full on-board layout editor which allows users to create their own layouts
|
||||
without writing a JSON file
|
||||
- Import/Export of custom layout files packed in Flex extensions
|
||||
|
||||
## Backlog / Features that MAY be added
|
||||
- Theme rework part II
|
||||
- Adaptive themes v2
|
||||
- Voice-to-text with Mozilla's open-source voice service
|
||||
- Text translation
|
||||
- Glide typing better word detection
|
||||
- Proximity-based key typo detection
|
||||
- Floating keyboard
|
||||
- Tablet mode / Optimizations for landscape input
|
||||
- Stickers/GIFs
|
||||
- FlorisBoard landing web page for presentation
|
||||
- Implementing additional layouts
|
||||
- Support for Tasker/Automate/MacroDroid plugins
|
||||
- Support for WearOS/Smartwatches
|
||||
- Handwriting
|
||||
- ...
|
||||
@@ -1,14 +1,13 @@
|
||||
|
||||
plugins {
|
||||
id("com.android.application") version "4.2.1"
|
||||
id("com.android.application") version "7.0.1"
|
||||
kotlin("android") version "1.5.20"
|
||||
kotlin("kapt") version "1.5.20"
|
||||
kotlin("plugin.serialization") version "1.5.20"
|
||||
}
|
||||
|
||||
android {
|
||||
compileSdkVersion(30)
|
||||
buildToolsVersion("30.0.3")
|
||||
compileSdk = 30
|
||||
buildToolsVersion = "30.0.3"
|
||||
ndkVersion = "22.1.7171670"
|
||||
|
||||
compileOptions {
|
||||
@@ -23,10 +22,10 @@ android {
|
||||
|
||||
defaultConfig {
|
||||
applicationId = "dev.patrickgold.florisboard"
|
||||
minSdkVersion(23)
|
||||
targetSdkVersion(30)
|
||||
versionCode(49)
|
||||
versionName("0.3.13")
|
||||
minSdk = 23
|
||||
targetSdk = 30
|
||||
versionCode = 56
|
||||
versionName = "0.3.13"
|
||||
|
||||
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
|
||||
|
||||
@@ -55,8 +54,11 @@ android {
|
||||
|
||||
sourceSets {
|
||||
maybeCreate("main").apply {
|
||||
jni {
|
||||
srcDirs("src/main/jniLibs")
|
||||
assets {
|
||||
srcDirs("src/main/assets", "src/main/icu4c/prebuilt/assets")
|
||||
}
|
||||
jniLibs {
|
||||
srcDirs("src/main/icu4c/prebuilt/jniLibs")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -88,7 +90,7 @@ android {
|
||||
create("beta") // Needed because by default the "beta" BuildType does not exist
|
||||
named("beta").configure {
|
||||
applicationIdSuffix = ".beta"
|
||||
versionNameSuffix = "-beta06"
|
||||
versionNameSuffix = "-beta12"
|
||||
proguardFiles.add(getDefaultProguardFile("proguard-android-optimize.txt"))
|
||||
|
||||
resValue("mipmap", "floris_app_icon", "@mipmap/ic_app_icon_beta")
|
||||
@@ -111,12 +113,11 @@ android {
|
||||
}
|
||||
}
|
||||
|
||||
lintOptions {
|
||||
lint {
|
||||
isAbortOnError = false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
dependencies {
|
||||
implementation("androidx.activity", "activity-ktx", "1.2.1")
|
||||
implementation("androidx.appcompat", "appcompat", "1.2.0")
|
||||
@@ -126,7 +127,7 @@ dependencies {
|
||||
implementation("androidx.preference", "preference-ktx", "1.1.1")
|
||||
implementation("androidx.constraintlayout", "constraintlayout", "2.0.4")
|
||||
implementation("androidx.lifecycle", "lifecycle-service", "2.2.0")
|
||||
implementation("com.google.android", "flexbox", "2.0.1")
|
||||
implementation("com.google.android.flexbox", "flexbox", "3.0.0")
|
||||
implementation("com.google.android.material", "material", "1.3.0")
|
||||
implementation("org.jetbrains.kotlinx", "kotlinx-coroutines-android", "1.4.2")
|
||||
implementation("org.jetbrains.kotlinx", "kotlinx-serialization-json", "1.1.0")
|
||||
|
||||
@@ -34,7 +34,8 @@
|
||||
android:name="dev.patrickgold.florisboard.FlorisImeService"
|
||||
android:label="@string/floris_app_name"
|
||||
android:permission="android.permission.BIND_INPUT_METHOD"
|
||||
android:directBootAware="true">
|
||||
android:directBootAware="true"
|
||||
android:exported="true">
|
||||
<intent-filter>
|
||||
<action android:name="android.view.InputMethod"/>
|
||||
</intent-filter>
|
||||
@@ -45,7 +46,8 @@
|
||||
<service
|
||||
android:name="dev.patrickgold.florisboard.FlorisSpellCheckerService"
|
||||
android:label="@string/floris_app_name"
|
||||
android:permission="android.permission.BIND_TEXT_SERVICE">
|
||||
android:permission="android.permission.BIND_TEXT_SERVICE"
|
||||
android:exported="true">
|
||||
<intent-filter>
|
||||
<action android:name="android.service.textservice.SpellCheckerService"/>
|
||||
</intent-filter>
|
||||
@@ -68,7 +70,8 @@
|
||||
android:label="@string/floris_app_name"
|
||||
android:launchMode="singleTask"
|
||||
android:roundIcon="@mipmap/floris_app_icon_round"
|
||||
android:targetActivity="dev.patrickgold.florisboard.setup.SetupActivity">
|
||||
android:targetActivity="dev.patrickgold.florisboard.setup.SetupActivity"
|
||||
android:exported="true">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN"/>
|
||||
<category android:name="android.intent.category.LAUNCHER"/>
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
"package": "dev.patrickgold.florisboard",
|
||||
"composers": [
|
||||
{ "$": "appender" },
|
||||
{ "$": "hangul-unicode" }
|
||||
{ "$": "hangul-unicode" },
|
||||
{ "$": "kana-unicode" }
|
||||
],
|
||||
"currencySets": [
|
||||
{
|
||||
@@ -682,6 +683,27 @@
|
||||
"preferred": {
|
||||
"characters": "korean"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3000,
|
||||
"languageTag": "lt-LT",
|
||||
"composer": "appender",
|
||||
"currencySet": "euro",
|
||||
"preferred": {
|
||||
"characters": "qwerty"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3100,
|
||||
"languageTag": "ja-JP-jis",
|
||||
"composer": "kana-unicode",
|
||||
"currencySet": "yen",
|
||||
"preferred": {
|
||||
"characters": "jis",
|
||||
"symbols": "cjk",
|
||||
"symbols2": "cjk",
|
||||
"numericRow": "cjk"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
BIN
app/src/main/assets/ime/dict/en.flict
Normal file
BIN
app/src/main/assets/ime/dict/en.flict
Normal file
Binary file not shown.
@@ -4,59 +4,21 @@
|
||||
"authors": [ "HeiWiper" ],
|
||||
"mapping": {
|
||||
"all": {
|
||||
"ض": {
|
||||
"relevant": [
|
||||
{ "code": 1633, "label": "١" }
|
||||
]
|
||||
},
|
||||
"ص": {
|
||||
"relevant": [
|
||||
{ "code": 1634, "label": "٢" }
|
||||
]
|
||||
},
|
||||
"ث": {
|
||||
"relevant": [
|
||||
{ "code": 1635, "label": "٣" }
|
||||
]
|
||||
},
|
||||
"ق": {
|
||||
"relevant": [
|
||||
{ "code": 1704, "label": "ڨ" },
|
||||
{ "code": 1636, "label": "٤" }
|
||||
{ "code": 1704, "label": "ڨ" }
|
||||
]
|
||||
},
|
||||
"ف": {
|
||||
"relevant": [
|
||||
{ "code": 1701, "label": "ڥ" },
|
||||
{ "code": 1700, "label": "ڤ" },
|
||||
{ "code": 1698, "label": "ڢ" },
|
||||
{ "code": 1637, "label": "٥" }
|
||||
]
|
||||
},
|
||||
"غ": {
|
||||
"relevant": [
|
||||
{ "code": 1638, "label": "٦" }
|
||||
]
|
||||
},
|
||||
"ع": {
|
||||
"relevant": [
|
||||
{ "code": 1639, "label": "٧" }
|
||||
{ "code": 1698, "label": "ڢ" }
|
||||
]
|
||||
},
|
||||
"ه": {
|
||||
"relevant": [
|
||||
{ "code": 1726, "label": "ھ" },
|
||||
{ "code": 1640, "label": "٨" }
|
||||
]
|
||||
},
|
||||
"خ": {
|
||||
"relevant": [
|
||||
{ "code": 1641, "label": "٩" }
|
||||
]
|
||||
},
|
||||
"ح": {
|
||||
"relevant": [
|
||||
{ "code": 1632, "label": "٠" }
|
||||
{ "code": 1726, "label": "ھ" }
|
||||
]
|
||||
},
|
||||
"ج": {
|
||||
@@ -70,8 +32,8 @@
|
||||
]
|
||||
},
|
||||
"ي": {
|
||||
"main": { "code": 1574, "label": "ئ" },
|
||||
"relevant": [
|
||||
{ "code": 1574, "label": "ئ" },
|
||||
{ "code": 1609, "label": "ى" }
|
||||
]
|
||||
},
|
||||
@@ -89,10 +51,10 @@
|
||||
]
|
||||
},
|
||||
"ا": {
|
||||
"main": { "code": 1571, "label": "أ" },
|
||||
"relevant": [
|
||||
{ "code": 1570, "label": "آ" },
|
||||
{ "code": 1569, "label": "ء" },
|
||||
{ "code": 1571, "label": "أ" },
|
||||
{ "code": 1573, "label": "إ" },
|
||||
{ "code": 1649, "label": "ٱ" }
|
||||
]
|
||||
@@ -104,9 +66,7 @@
|
||||
]
|
||||
},
|
||||
"ى": {
|
||||
"relevant": [
|
||||
{ "code": 1574, "label": "ئ" }
|
||||
]
|
||||
"main": { "code": 1574, "label": "ئ" }
|
||||
},
|
||||
"ز": {
|
||||
"relevant": [
|
||||
|
||||
@@ -0,0 +1,292 @@
|
||||
{
|
||||
"type": "characters/extended_popups",
|
||||
"name": "ja-JP-jis",
|
||||
"authors": [ "waelwindows" ],
|
||||
"mapping": {
|
||||
"all": {
|
||||
"あ": {
|
||||
"main": { "code": 12353, "label": "ぁ" }
|
||||
},
|
||||
"ア": {
|
||||
"main": { "code": 12449, "label": "ァ" }
|
||||
},
|
||||
"ア": {
|
||||
"main": { "code": 65383, "label": "ァ" }
|
||||
},
|
||||
"い": {
|
||||
"main": { "code": 12355, "label": "ぃ" },
|
||||
"relevant": [
|
||||
{ "code": 12432, "label": "ゐ" },
|
||||
{ "code": 110928, "label": "𛅐" }
|
||||
]
|
||||
},
|
||||
"イ": {
|
||||
"main": { "code": 12451, "label": "ィ" },
|
||||
"relevant": [
|
||||
{ "code": 12528, "label": "ヰ" },
|
||||
{ "code": 110948, "label": "𛅤" }
|
||||
]
|
||||
},
|
||||
"イ": {
|
||||
"main": { "code": 65384, "label": "ィ" }
|
||||
},
|
||||
"う": {
|
||||
"main": { "code": 12357, "label": "ぅ" }
|
||||
},
|
||||
"ウ": {
|
||||
"main": { "code": 12453, "label": "ゥ" }
|
||||
},
|
||||
"ウ": {
|
||||
"main": { "code": 65385, "label": "ゥ" }
|
||||
},
|
||||
"え": {
|
||||
"main": { "code": 12359, "label": "ぇ" },
|
||||
"relevant": [
|
||||
{ "code": 12433, "label": "ゑ" },
|
||||
{ "code": 110929, "label": "𛅑" }
|
||||
]
|
||||
},
|
||||
"エ": {
|
||||
"main": { "code": 12455, "label": "ェ" },
|
||||
"relevant": [
|
||||
{ "code": 12529, "label": "ヱ" },
|
||||
{ "code": 110949, "label": "𛅥" }
|
||||
]
|
||||
},
|
||||
"エ": {
|
||||
"main": { "code": 65386, "label": "ェ" }
|
||||
},
|
||||
"お": {
|
||||
"main": { "code": 12361, "label": "ぉ" }
|
||||
},
|
||||
"オ": {
|
||||
"main": { "code": 12457, "label": "ォ" }
|
||||
},
|
||||
"オ": {
|
||||
"main": { "code": 65387, "label": "ォ" }
|
||||
},
|
||||
"や": {
|
||||
"main": { "code": 12419, "label": "ゃ" }
|
||||
},
|
||||
"ヤ": {
|
||||
"main": { "code": 12515, "label": "ャ" }
|
||||
},
|
||||
"ヤ": {
|
||||
"main": { "code": 65388, "label": "ャ" }
|
||||
},
|
||||
"ゆ": {
|
||||
"main": { "code": 12421, "label": "ゅ" }
|
||||
},
|
||||
"ユ": {
|
||||
"main": { "code": 12517, "label": "ュ" }
|
||||
},
|
||||
"ユ": {
|
||||
"main": { "code": 65389, "label": "ュ" }
|
||||
},
|
||||
"よ": {
|
||||
"main": { "code": 12423, "label": "ょ" }
|
||||
},
|
||||
"ヨ": {
|
||||
"main": { "code": 12519, "label": "ョ" }
|
||||
},
|
||||
"ヨ": {
|
||||
"main": { "code": 65390, "label": "ョ" }
|
||||
},
|
||||
"わ": {
|
||||
"main": { "code": 12434, "label": "を" },
|
||||
"relevant": [
|
||||
{ "code": 12430, "label": "ゎ" },
|
||||
{ "code": 110930, "label": "𛅒" }
|
||||
]
|
||||
},
|
||||
"ワ": {
|
||||
"main": { "code": 12530, "label": "ヲ" },
|
||||
"relevant": [
|
||||
{ "code": 12526, "label": "ヮ" },
|
||||
{ "code": 110950, "label": "𛅦" }
|
||||
]
|
||||
},
|
||||
"ワ": {
|
||||
"main": { "code": 65382, "label": "ヲ" }
|
||||
},
|
||||
"つ": {
|
||||
"main": { "code": 12387, "label": "っ" }
|
||||
},
|
||||
"ツ": {
|
||||
"main": { "code": 12483, "label": "ッ" }
|
||||
},
|
||||
"ツ": {
|
||||
"main": { "code": 65391, "label": "ッ" }
|
||||
},
|
||||
"ト": {
|
||||
"relevant": [
|
||||
{ "code": 12787, "label": "ㇳ" }
|
||||
]
|
||||
},
|
||||
"シ": {
|
||||
"relevant": [
|
||||
{ "code": 12785, "label": "ㇱ" }
|
||||
]
|
||||
},
|
||||
"ス": {
|
||||
"main": { "code": 12786, "label": "ㇲ" }
|
||||
},
|
||||
"か": {
|
||||
"main": { "code": 12437, "label": "ゕ" }
|
||||
},
|
||||
"カ": {
|
||||
"main": { "code": 12533, "label": "ヵ" }
|
||||
},
|
||||
"ク": {
|
||||
"main": { "code": 12784, "label": "ㇰ" }
|
||||
},
|
||||
"け": {
|
||||
"main": { "code": 12438, "label": "ゖ" }
|
||||
},
|
||||
"ケ": {
|
||||
"main": { "code": 12534, "label": "ヶ" }
|
||||
},
|
||||
"ヌ": {
|
||||
"relevant": [
|
||||
{ "code": 12788, "label": "ㇴ" }
|
||||
]
|
||||
},
|
||||
"ハ": {
|
||||
"relevant": [
|
||||
{ "code": 12789, "label": "ㇵ" }
|
||||
]
|
||||
},
|
||||
"ヒ": {
|
||||
"relevant": [
|
||||
{ "code": 12790, "label": "ㇶ" }
|
||||
]
|
||||
},
|
||||
"フ": {
|
||||
"relevant": [
|
||||
{ "code": 12791, "label": "ㇷ" }
|
||||
]
|
||||
},
|
||||
"ヘ": {
|
||||
"relevant": [
|
||||
{ "code": 12792, "label": "ㇸ" }
|
||||
]
|
||||
},
|
||||
"ホ": {
|
||||
"relevant": [
|
||||
{ "code": 12793, "label": "ㇹ" }
|
||||
]
|
||||
},
|
||||
"ム": {
|
||||
"relevant": [
|
||||
{ "code": 12794, "label": "ㇺ" }
|
||||
]
|
||||
},
|
||||
"ラ": {
|
||||
"relevant": [
|
||||
{ "code": 12795, "label": "ㇻ" }
|
||||
]
|
||||
},
|
||||
"リ": {
|
||||
"relevant": [
|
||||
{ "code": 12796, "label": "ㇼ" }
|
||||
]
|
||||
},
|
||||
"ル": {
|
||||
"relevant": [
|
||||
{ "code": 12797, "label": "ㇽ" }
|
||||
]
|
||||
},
|
||||
"レ": {
|
||||
"relevant": [
|
||||
{ "code": 12798, "label": "ㇾ" }
|
||||
]
|
||||
},
|
||||
"ロ": {
|
||||
"relevant": [
|
||||
{ "code": 12799, "label": "ㇿ" }
|
||||
]
|
||||
},
|
||||
"゛": {
|
||||
"main": { "code": 12443, "label": "゛" }
|
||||
},
|
||||
"゜": {
|
||||
"main": { "code": 12444, "label": "゜" }
|
||||
},
|
||||
"~kana": {
|
||||
"relevant": [
|
||||
{ "code": -9711, "label": "あ" },
|
||||
{ "code": -9712, "label": "ア" },
|
||||
{ "code": -9713, "label": "ア" }
|
||||
]
|
||||
},
|
||||
"~right": {
|
||||
"main": { "$": "char_width_selector",
|
||||
"full": { "code": 12539, "label": "・" },
|
||||
"half": { "code": 9834, "label": "♪" }
|
||||
},
|
||||
"relevant": [
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65286, "label": "&" },
|
||||
"half": { "code": 38, "label": "&" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65285, "label": "%" },
|
||||
"half": { "code": 37, "label": "%" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65291, "label": "+" },
|
||||
"half": { "code": 43, "label": "+" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65340, "label": "\" },
|
||||
"half": { "code": 92, "label": "\\" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65293, "label": "-" },
|
||||
"half": { "code": 45, "label": "-" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65306, "label": ":" },
|
||||
"half": { "code": 58, "label": ":" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65287, "label": "'" },
|
||||
"half": { "code": 39, "label": "'" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65312, "label": "@" },
|
||||
"half": { "code": 64, "label": "@" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65307, "label": ";" },
|
||||
"half": { "code": 59, "label": ";" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65295, "label": "/" },
|
||||
"half": { "code": 47, "label": "/" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65288, "label": "(" },
|
||||
"half": { "code": 40, "label": "(" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65289, "label": ")" },
|
||||
"half": { "code": 41, "label": ")" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65283, "label": "#" },
|
||||
"half": { "code": 35, "label": "#" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65281, "label": "!" },
|
||||
"half": { "code": 33, "label": "!" }
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65311, "label": "?" },
|
||||
"half": { "code": 63, "label": "?" }
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
167
app/src/main/assets/ime/text/characters/extended_popups/lt.json
Normal file
167
app/src/main/assets/ime/text/characters/extended_popups/lt.json
Normal file
@@ -0,0 +1,167 @@
|
||||
{
|
||||
"type": "characters/extended_popups",
|
||||
"name": "lv",
|
||||
"authors": [ "patrickgold" ],
|
||||
"mapping": {
|
||||
"all": {
|
||||
"a": {
|
||||
"main": { "$": "auto_text_key", "code": 261, "label": "ą" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 226, "label": "â" },
|
||||
{ "$": "auto_text_key", "code": 227, "label": "ã" },
|
||||
{ "$": "auto_text_key", "code": 229, "label": "å" },
|
||||
{ "$": "auto_text_key", "code": 230, "label": "æ" },
|
||||
{ "$": "auto_text_key", "code": 228, "label": "ä" },
|
||||
{ "$": "auto_text_key", "code": 257, "label": "ā" },
|
||||
{ "$": "auto_text_key", "code": 224, "label": "à" },
|
||||
{ "$": "auto_text_key", "code": 225, "label": "á" }
|
||||
]
|
||||
},
|
||||
"c": {
|
||||
"main": { "$": "auto_text_key", "code": 269, "label": "č" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 263, "label": "ć" },
|
||||
{ "$": "auto_text_key", "code": 231, "label": "ç" }
|
||||
]
|
||||
},
|
||||
"d": {
|
||||
"main": { "$": "auto_text_key", "code": 271, "label": "ď" }
|
||||
},
|
||||
"e": {
|
||||
"main": { "$": "auto_text_key", "code": 279, "label": "ė" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 235, "label": "ë" },
|
||||
{ "$": "auto_text_key", "code": 233, "label": "é" },
|
||||
{ "$": "auto_text_key", "code": 234, "label": "ê" },
|
||||
{ "$": "auto_text_key", "code": 283, "label": "ě" },
|
||||
{ "$": "auto_text_key", "code": 275, "label": "ē" },
|
||||
{ "$": "auto_text_key", "code": 281, "label": "ę" },
|
||||
{ "$": "auto_text_key", "code": 232, "label": "è" }
|
||||
]
|
||||
},
|
||||
"g": {
|
||||
"main": { "$": "auto_text_key", "code": 291, "label": "ģ" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 287, "label": "ğ" }
|
||||
]
|
||||
},
|
||||
"i": {
|
||||
"main": { "$": "auto_text_key", "code": 303, "label": "į" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 238, "label": "î" },
|
||||
{ "$": "auto_text_key", "code": 239, "label": "ï" },
|
||||
{ "$": "auto_text_key", "code": 236, "label": "ì" },
|
||||
{ "$": "auto_text_key", "code": 299, "label": "ī" },
|
||||
{ "$": "auto_text_key", "code": 237, "label": "í" }
|
||||
]
|
||||
},
|
||||
"k": {
|
||||
"main": { "$": "auto_text_key", "code": 311, "label": "ķ" }
|
||||
},
|
||||
"l": {
|
||||
"main": { "$": "auto_text_key", "code": 316, "label": "ļ" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 318, "label": "ľ" },
|
||||
{ "$": "auto_text_key", "code": 314, "label": "ĺ" },
|
||||
{ "$": "auto_text_key", "code": 322, "label": "ł" }
|
||||
]
|
||||
},
|
||||
"n": {
|
||||
"main": { "$": "auto_text_key", "code": 326, "label": "ņ" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 324, "label": "ń" },
|
||||
{ "$": "auto_text_key", "code": 241, "label": "ñ" }
|
||||
]
|
||||
},
|
||||
"o": {
|
||||
"main": { "$": "auto_text_key", "code": 246, "label": "ö" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 248, "label": "ø" },
|
||||
{ "$": "auto_text_key", "code": 337, "label": "ő" },
|
||||
{ "$": "auto_text_key", "code": 244, "label": "ô" },
|
||||
{ "$": "auto_text_key", "code": 339, "label": "œ" },
|
||||
{ "$": "auto_text_key", "code": 243, "label": "ó" },
|
||||
{ "$": "auto_text_key", "code": 242, "label": "ò" },
|
||||
{ "$": "auto_text_key", "code": 245, "label": "õ" }
|
||||
]
|
||||
},
|
||||
"r": {
|
||||
"main": { "$": "auto_text_key", "code": 343, "label": "ŗ" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 341, "label": "ŕ" },
|
||||
{ "$": "auto_text_key", "code": 345, "label": "ř" }
|
||||
]
|
||||
},
|
||||
"s": {
|
||||
"main": { "$": "auto_text_key", "code": 353, "label": "š" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 347, "label": "ś" },
|
||||
{ "$": "auto_text_key", "code": 223, "label": "ß" },
|
||||
{ "$": "auto_text_key", "code": 351, "label": "ş" }
|
||||
]
|
||||
},
|
||||
"t": {
|
||||
"main": { "$": "auto_text_key", "code": 355, "label": "ţ" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 357, "label": "ť" }
|
||||
]
|
||||
},
|
||||
"u": {
|
||||
"main": { "$": "auto_text_key", "code": 363, "label": "ū" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 367, "label": "ů" },
|
||||
{ "$": "auto_text_key", "code": 250, "label": "ú" },
|
||||
{ "$": "auto_text_key", "code": 251, "label": "û" },
|
||||
{ "$": "auto_text_key", "code": 369, "label": "ű" },
|
||||
{ "$": "auto_text_key", "code": 249, "label": "ù" },
|
||||
{ "$": "auto_text_key", "code": 252, "label": "ü" },
|
||||
{ "$": "auto_text_key", "code": 371, "label": "ų" }
|
||||
]
|
||||
},
|
||||
"y": {
|
||||
"main": { "$": "auto_text_key", "code": 253, "label": "ý" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 255, "label": "ÿ" }
|
||||
]
|
||||
},
|
||||
"z": {
|
||||
"main": { "$": "auto_text_key", "code": 382, "label": "ž" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 378, "label": "ź" },
|
||||
{ "$": "auto_text_key", "code": 380, "label": "ż" }
|
||||
]
|
||||
},
|
||||
"~right": {
|
||||
"main": { "code": 44, "label": "," },
|
||||
"relevant": [
|
||||
{ "code": 38, "label": "&" },
|
||||
{ "code": 37, "label": "%" },
|
||||
{ "code": 43, "label": "+" },
|
||||
{ "code": 34, "label": "\"" },
|
||||
{ "code": 45, "label": "-" },
|
||||
{ "code": 58, "label": ":" },
|
||||
{ "code": 39, "label": "'" },
|
||||
{ "code": 64, "label": "@" },
|
||||
{ "code": 59, "label": ";" },
|
||||
{ "code": 47, "label": "/" },
|
||||
{ "code": 40, "label": "(" },
|
||||
{ "code": 41, "label": ")" },
|
||||
{ "code": 35, "label": "#" },
|
||||
{ "code": 33, "label": "!" },
|
||||
{ "code": 63, "label": "?" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"uri": {
|
||||
"~right": {
|
||||
"main": { "code": -255, "label": ".lv" },
|
||||
"relevant": [
|
||||
{ "code": -255, "label": ".gov" },
|
||||
{ "code": -255, "label": ".edu" },
|
||||
{ "code": -255, "label": ".com" },
|
||||
{ "code": -255, "label": ".net" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,99 +1,35 @@
|
||||
{
|
||||
"type": "characters/extended_popups",
|
||||
"name": "tr",
|
||||
"authors": [ "kisekinopureya", "patrickgold" ],
|
||||
"authors": [ "kisekinopureya", "patrickgold", "dvrnynr" ],
|
||||
"mapping": {
|
||||
"all": {
|
||||
"a": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 226, "label": "â" },
|
||||
{ "$": "auto_text_key", "code": 228, "label": "ä" },
|
||||
{ "$": "auto_text_key", "code": 225, "label": "á" }
|
||||
]
|
||||
},
|
||||
"c": {
|
||||
"main": { "$": "auto_text_key", "code": 231, "label": "ç" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 269, "label": "č" },
|
||||
{ "$": "auto_text_key", "code": 263, "label": "ć" }
|
||||
]
|
||||
},
|
||||
"e": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 233, "label": "é" },
|
||||
{ "$": "auto_text_key", "code": 601, "label": "ə" },
|
||||
{ "$": "auto_text_key", "code": 234, "label": "ê" }
|
||||
]
|
||||
"main": { "$": "auto_text_key", "code": 231, "label": "ç" }
|
||||
},
|
||||
"g": {
|
||||
"main": { "$": "auto_text_key", "code": 287, "label": "ğ" }
|
||||
},
|
||||
"i": {
|
||||
"main": { "$": "auto_text_key", "code": 305, "label": "ı" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 303, "label": "į" },
|
||||
{ "$": "auto_text_key", "code": 236, "label": "ì" },
|
||||
{ "$": "auto_text_key", "code": 237, "label": "í" },
|
||||
{ "$": "auto_text_key", "code": 299, "label": "ī" },
|
||||
{ "$": "auto_text_key", "code": 238, "label": "î" },
|
||||
{ "$": "auto_text_key", "code": 239, "label": "ï" }
|
||||
]
|
||||
"main": { "$": "case_selector",
|
||||
"lower": { "code": 305, "label": "ı" },
|
||||
"upper": { "code": 73, "label": "I" }
|
||||
}
|
||||
},
|
||||
"ı": {
|
||||
"main": { "$": "auto_text_key", "code": 105, "label": "i" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 303, "label": "į" },
|
||||
{ "$": "auto_text_key", "code": 236, "label": "ì" },
|
||||
{ "$": "auto_text_key", "code": 237, "label": "í" },
|
||||
{ "$": "auto_text_key", "code": 299, "label": "ī" },
|
||||
{ "$": "auto_text_key", "code": 238, "label": "î" },
|
||||
{ "$": "auto_text_key", "code": 239, "label": "ï" }
|
||||
]
|
||||
},
|
||||
"n": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 328, "label": "ň" },
|
||||
{ "$": "auto_text_key", "code": 241, "label": "ñ" }
|
||||
]
|
||||
"main": { "$": "case_selector",
|
||||
"lower": { "code": 105, "label": "i" },
|
||||
"upper": { "code": 304, "label": "İ" }
|
||||
}
|
||||
},
|
||||
"o": {
|
||||
"main": { "$": "auto_text_key", "code": 246, "label": "ö" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 333, "label": "ō" },
|
||||
{ "$": "auto_text_key", "code": 248, "label": "ø" },
|
||||
{ "$": "auto_text_key", "code": 243, "label": "ó" },
|
||||
{ "$": "auto_text_key", "code": 245, "label": "õ" },
|
||||
{ "$": "auto_text_key", "code": 242, "label": "ò" },
|
||||
{ "$": "auto_text_key", "code": 339, "label": "œ" },
|
||||
{ "$": "auto_text_key", "code": 244, "label": "ô" }
|
||||
]
|
||||
"main": { "$": "auto_text_key", "code": 246, "label": "ö" }
|
||||
},
|
||||
"s": {
|
||||
"main": { "$": "auto_text_key", "code": 351, "label": "ş" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 347, "label": "ś" },
|
||||
{ "$": "auto_text_key", "code": 223, "label": "ß" },
|
||||
{ "$": "auto_text_key", "code": 353, "label": "š" }
|
||||
]
|
||||
"main": { "$": "auto_text_key", "code": 351, "label": "ş" }
|
||||
},
|
||||
"u": {
|
||||
"main": { "$": "auto_text_key", "code": 252, "label": "ü" },
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 363, "label": "ū" },
|
||||
{ "$": "auto_text_key", "code": 249, "label": "ù" },
|
||||
{ "$": "auto_text_key", "code": 250, "label": "ú" },
|
||||
{ "$": "auto_text_key", "code": 251, "label": "û" }
|
||||
]
|
||||
},
|
||||
"y": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 253, "label": "ý" }
|
||||
]
|
||||
},
|
||||
"z": {
|
||||
"relevant": [
|
||||
{ "$": "auto_text_key", "code": 382, "label": "ž" }
|
||||
]
|
||||
"main": { "$": "auto_text_key", "code": 252, "label": "ü" }
|
||||
},
|
||||
"~right": {
|
||||
"main": { "code": 44, "label": "," },
|
||||
@@ -120,10 +56,10 @@
|
||||
"~right": {
|
||||
"main": { "code": -255, "label": ".com" },
|
||||
"relevant": [
|
||||
{ "code": -255, "label": ".gov" },
|
||||
{ "code": -255, "label": ".gov.tr" },
|
||||
{ "code": -255, "label": ".org" },
|
||||
{ "code": -255, "label": ".edu" },
|
||||
{ "code": -255, "label": ".tr" },
|
||||
{ "code": -255, "label": ".edu.tr" },
|
||||
{ "code": -255, "label": ".com.tr" },
|
||||
{ "code": -255, "label": ".net" }
|
||||
]
|
||||
}
|
||||
|
||||
354
app/src/main/assets/ime/text/characters/jis.json
Normal file
354
app/src/main/assets/ime/text/characters/jis.json
Normal file
@@ -0,0 +1,354 @@
|
||||
{
|
||||
"type": "characters",
|
||||
"name": "jis",
|
||||
"label": "JIS",
|
||||
"authors": [ "waelwindows" ],
|
||||
"direction": "ltr",
|
||||
"modifier": "jis",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12396, "label": "ぬ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12492, "label": "ヌ" },
|
||||
"half": { "code": 65415, "label": "ヌ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12405, "label": "ふ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12501, "label": "フ" },
|
||||
"half": { "code": 65420, "label": "フ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12354, "label": "あ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12450, "label": "ア" },
|
||||
"half": { "code": 65393, "label": "ア" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12358, "label": "う" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12454, "label": "ウ" },
|
||||
"half": { "code": 65395, "label": "ウ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12360, "label": "え" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12456, "label": "エ" },
|
||||
"half": { "code": 65396, "label": "エ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12362, "label": "お" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12458, "label": "オ" },
|
||||
"half": { "code": 65397, "label": "オ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12420, "label": "や" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12516, "label": "ヤ" },
|
||||
"half": { "code": 65428, "label": "ヤ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12422, "label": "ゆ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12518, "label": "ユ" },
|
||||
"half": { "code": 65429, "label": "ユ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12424, "label": "よ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12520, "label": "ヨ" },
|
||||
"half": { "code": 65430, "label": "ヨ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12431, "label": "わ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12527, "label": "ワ" },
|
||||
"half": { "code": 65436, "label": "ワ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12411, "label": "ほ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12507, "label": "ホ" },
|
||||
"half": { "code": 65422, "label": "ホ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12408, "label": "へ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12504, "label": "ヘ" },
|
||||
"half": { "code": 65421, "label": "ヘ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12540, "label": "ー" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12540, "label": "ー" },
|
||||
"half": { "code": 65392, "label": "ー" }
|
||||
}
|
||||
}
|
||||
],
|
||||
[
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12383, "label": "た" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12479, "label": "タ" },
|
||||
"half": { "code": 65408, "label": "タ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12390, "label": "て" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12486, "label": "テ" },
|
||||
"half": { "code": 65411, "label": "テ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12356, "label": "い" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12452, "label": "イ" },
|
||||
"half": { "code": 65394, "label": "イ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12377, "label": "す" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12473, "label": "ス" },
|
||||
"half": { "code": 65405, "label": "ス" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12363, "label": "か" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12459, "label": "カ" },
|
||||
"half": { "code": 65398, "label": "カ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12435, "label": "ん" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12531, "label": "ン" },
|
||||
"half": { "code": 65437, "label": "ン" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12394, "label": "な" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12490, "label": "ナ" },
|
||||
"half": { "code": 65413, "label": "ナ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12395, "label": "に" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12491, "label": "ニ" },
|
||||
"half": { "code": 65414, "label": "ニ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12425, "label": "ら" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12521, "label": "ラ" },
|
||||
"half": { "code": 65431, "label": "ラ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12379, "label": "せ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12475, "label": "セ" },
|
||||
"half": { "code": 65406, "label": "セ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12441, "label": "゛" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12441, "label": "゛" },
|
||||
"half": { "code": 65438, "label": "゙" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12442, "label": "゜" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12442, "label": "゜" },
|
||||
"half": { "code": 65439, "label": "゚" }
|
||||
}
|
||||
}
|
||||
],
|
||||
[
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12385, "label": "ち" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12481, "label": "チ" },
|
||||
"half": { "code": 65409, "label": "チ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12392, "label": "と" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12488, "label": "ト" },
|
||||
"half": { "code": 65412, "label": "ト" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12375, "label": "し" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12471, "label": "シ" },
|
||||
"half": { "code": 65410, "label": "シ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12399, "label": "は" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12495, "label": "ハ" },
|
||||
"half": { "code": 65418, "label": "ハ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12365, "label": "き" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12461, "label": "キ" },
|
||||
"half": { "code": 65399, "label": "キ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12367, "label": "く" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12463, "label": "ク" },
|
||||
"half": { "code": 65400, "label": "ク" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12414, "label": "ま" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12510, "label": "マ" },
|
||||
"half": { "code": 65423, "label": "マ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12398, "label": "の" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12494, "label": "ノ" },
|
||||
"half": { "code": 65417, "label": "ノ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12426, "label": "り" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12522, "label": "リ" },
|
||||
"half": { "code": 65432, "label": "リ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12428, "label": "れ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12524, "label": "レ" },
|
||||
"half": { "code": 65434, "label": "レ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12369, "label": "け" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12465, "label": "ケ" },
|
||||
"half": { "code": 65401, "label": "ケ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12416, "label": "む" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12512, "label": "ム" },
|
||||
"half": { "code": 65425, "label": "ム" }
|
||||
}
|
||||
}
|
||||
],
|
||||
[
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12388, "label": "つ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12484, "label": "ツ" },
|
||||
"half": { "code": 65410, "label": "ツ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12373, "label": "さ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12469, "label": "サ" },
|
||||
"half": { "code": 65403, "label": "サ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12381, "label": "そ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12477, "label": "ソ" },
|
||||
"half": { "code": 65407, "label": "ソ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12402, "label": "ひ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12498, "label": "ヒ" },
|
||||
"half": { "code": 65419, "label": "ヒ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12371, "label": "こ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12467, "label": "コ" },
|
||||
"half": { "code": 65402, "label": "コ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12415, "label": "み" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12511, "label": "ミ" },
|
||||
"half": { "code": 65424, "label": "ミ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12418, "label": "も" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12514, "label": "モ" },
|
||||
"half": { "code": 65427, "label": "モ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12397, "label": "ね" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12493, "label": "ネ" },
|
||||
"half": { "code": 65416, "label": "ネ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12427, "label": "る" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12523, "label": "ル" },
|
||||
"half": { "code": 65433, "label": "ル" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12417, "label": "め" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12513, "label": "メ" },
|
||||
"half": { "code": 65426, "label": "メ" }
|
||||
}
|
||||
},
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12429, "label": "ろ" },
|
||||
"kata": { "$": "char_width_selector",
|
||||
"full": { "code": 12525, "label": "ロ" },
|
||||
"half": { "code": 65435, "label": "ロ" }
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
29
app/src/main/assets/ime/text/characters/mod/jis.json
Normal file
29
app/src/main/assets/ime/text/characters/mod/jis.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"type": "characters/mod",
|
||||
"name": "jis",
|
||||
"label": "JIS",
|
||||
"authors": [ "waelwindows" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 0, "type": "placeholder" },
|
||||
{ "code": -5, "label": "delete", "type": "enter_editing" }
|
||||
],
|
||||
[
|
||||
{ "code": -202, "label": "view_symbols", "type": "system_gui" },
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 12289, "label": "、", "groupId": 1 },
|
||||
"half": { "code": 65380, "label": "、", "groupId": 1 }
|
||||
},
|
||||
{ "code": -210, "label": "language_switch", "type": "system_gui" },
|
||||
{ "code": -213, "label": "switch_to_media_context", "type": "system_gui" },
|
||||
{ "code": 12288, "label": "空白" },
|
||||
{ "code": -9710, "label": "かな", "groupId": 97, "type": "system_gui" },
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 12290, "label": "。", "groupId": 2 },
|
||||
"half": { "code": 65377, "label": "。", "groupId": 2 }
|
||||
},
|
||||
{ "code": 10, "label": "enter", "groupId": 3, "type": "enter_editing" }
|
||||
]
|
||||
]
|
||||
}
|
||||
@@ -9,7 +9,10 @@
|
||||
{ "$": "auto_text_key", "code": 102, "label": "f" },
|
||||
{ "$": "auto_text_key", "code": 103, "label": "g" },
|
||||
{ "$": "auto_text_key", "code": 287, "label": "ğ" },
|
||||
{ "$": "auto_text_key", "code": 305, "label": "ı" },
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 305, "label": "ı" },
|
||||
"upper": { "code": 73, "label": "I" }
|
||||
},
|
||||
{ "$": "auto_text_key", "code": 111, "label": "o" },
|
||||
{ "$": "auto_text_key", "code": 100, "label": "d" },
|
||||
{ "$": "auto_text_key", "code": 114, "label": "r" },
|
||||
@@ -21,7 +24,10 @@
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 117, "label": "u" },
|
||||
{ "$": "auto_text_key", "code": 105, "label": "i" },
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 105, "label": "i" },
|
||||
"upper": { "code": 304, "label": "İ" }
|
||||
},
|
||||
{ "$": "auto_text_key", "code": 101, "label": "e" },
|
||||
{ "$": "auto_text_key", "code": 97, "label": "a" },
|
||||
{ "$": "auto_text_key", "code": 252, "label": "ü" },
|
||||
|
||||
@@ -13,7 +13,10 @@
|
||||
{ "$": "auto_text_key", "code": 116, "label": "t" },
|
||||
{ "$": "auto_text_key", "code": 121, "label": "y" },
|
||||
{ "$": "auto_text_key", "code": 117, "label": "u" },
|
||||
{ "$": "auto_text_key", "code": 305, "label": "ı" },
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 305, "label": "ı" },
|
||||
"upper": { "code": 73, "label": "I" }
|
||||
},
|
||||
{ "$": "auto_text_key", "code": 111, "label": "o" },
|
||||
{ "$": "auto_text_key", "code": 112, "label": "p" },
|
||||
{ "$": "auto_text_key", "code": 287, "label": "ğ" },
|
||||
@@ -30,7 +33,10 @@
|
||||
{ "$": "auto_text_key", "code": 107, "label": "k" },
|
||||
{ "$": "auto_text_key", "code": 108, "label": "l" },
|
||||
{ "$": "auto_text_key", "code": 351, "label": "ş" },
|
||||
{ "$": "auto_text_key", "code": 105, "label": "i" }
|
||||
{ "$": "case_selector",
|
||||
"lower": { "code": 105, "label": "i" },
|
||||
"upper": { "code": 304, "label": "İ" }
|
||||
}
|
||||
],
|
||||
[
|
||||
{ "$": "auto_text_key", "code": 122, "label": "z" },
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
{
|
||||
"type": "numeric_advanced",
|
||||
"name": "western_arabic_pc",
|
||||
"label": "Western Arabic (PC)",
|
||||
"authors": [ "patrickgold" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 43, "label": "+", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 45, "label": "-" },
|
||||
{ "code": 42, "label": "*" },
|
||||
{ "code": 47, "label": "/" }
|
||||
]
|
||||
} },
|
||||
{ "code": 55, "label": "7", "type": "numeric" },
|
||||
{ "code": 56, "label": "8", "type": "numeric" },
|
||||
{ "code": 57, "label": "9", "type": "numeric" },
|
||||
{ "code": 37, "label": "%" }
|
||||
],
|
||||
[
|
||||
{ "code": 40, "label": "(", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 91, "label": "[" },
|
||||
{ "code": 123, "label": "{" }
|
||||
]
|
||||
} },
|
||||
{ "code": 52, "label": "4", "type": "numeric" },
|
||||
{ "code": 53, "label": "5", "type": "numeric" },
|
||||
{ "code": 54, "label": "6", "type": "numeric" },
|
||||
{ "code": 32, "label": "space" }
|
||||
],
|
||||
[
|
||||
{ "code": 41, "label": ")", "popup": {
|
||||
"relevant": [
|
||||
{ "code": 93, "label": "]" },
|
||||
{ "code": 125, "label": "}" }
|
||||
]
|
||||
} },
|
||||
{ "code": 49, "label": "1", "type": "numeric" },
|
||||
{ "code": 50, "label": "2", "type": "numeric" },
|
||||
{ "code": 51, "label": "3", "type": "numeric" },
|
||||
{ "code": -5, "label": "delete", "type": "enter_editing" }
|
||||
],
|
||||
[
|
||||
{ "code": -201, "label": "view_characters", "type": "system_gui" },
|
||||
{ "code": 44, "label": "," },
|
||||
{ "code": -202, "label": "view_symbols", "type": "system_gui" },
|
||||
{ "code": 48, "label": "0", "type": "numeric" },
|
||||
{ "code": 61, "label": "=" },
|
||||
{ "code": 46, "label": "." },
|
||||
{ "code": 10, "label": "enter", "groupId": 3, "type": "enter_editing" }
|
||||
]
|
||||
]
|
||||
}
|
||||
108
app/src/main/assets/ime/text/numeric/row/cjk.json
Normal file
108
app/src/main/assets/ime/text/numeric/row/cjk.json
Normal file
@@ -0,0 +1,108 @@
|
||||
{
|
||||
"type": "numeric_row",
|
||||
"name": "cjk",
|
||||
"label": "CJK",
|
||||
"authors": [ "waelwindows" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 19968, "label": "一", "type": "numeric", "popup": {
|
||||
"main": { "code": 49, "label": "1" },
|
||||
"relevant": [
|
||||
{ "code": 22769, "label": "壱" },
|
||||
{ "code": 22777, "label": "壹" },
|
||||
{ "code": 24332, "label": "弌" },
|
||||
{ "code": 65297, "label": "1" }
|
||||
]
|
||||
} },
|
||||
{ "code": 20108, "label": "二", "type": "numeric", "popup": {
|
||||
"main": { "code": 50, "label": "2" },
|
||||
"relevant": [
|
||||
{ "code": 24336, "label": "弐" },
|
||||
{ "code": 36019, "label": "貳" },
|
||||
{ "code": 36014, "label": "貮" },
|
||||
{ "code": 65298, "label": "2" }
|
||||
]
|
||||
} },
|
||||
{ "code": 19977, "label": "三", "type": "numeric", "popup": {
|
||||
"main": { "code": 51, "label": "3" },
|
||||
"relevant": [
|
||||
{ "code": 21442, "label": "参" },
|
||||
{ "code": 21443, "label": "參" },
|
||||
{ "code": 24334, "label": "弎" },
|
||||
{ "code": 65299, "label": "3" }
|
||||
]
|
||||
} },
|
||||
{ "code": 22235, "label": "四", "type": "numeric", "popup": {
|
||||
"main": { "code": 52, "label": "4" },
|
||||
"relevant": [
|
||||
{ "code": 32902, "label": "肆" },
|
||||
{ "code": 18825, "label": "䦉" },
|
||||
{ "code": 20118, "label": "亖" },
|
||||
{ "code": 65300, "label": "3" }
|
||||
]
|
||||
} },
|
||||
{ "code": 20116, "label": "五", "type": "numeric", "popup": {
|
||||
"main": { "code": 53, "label": "5" },
|
||||
"relevant": [
|
||||
{ "code": 20237, "label": "伍" },
|
||||
{ "code": 65301, "label": "5" }
|
||||
]
|
||||
} },
|
||||
{ "code": 20845, "label": "六", "type": "numeric", "popup": {
|
||||
"main": { "code": 54, "label": "6" },
|
||||
"relevant": [
|
||||
{ "code": 38520, "label": "陸" },
|
||||
{ "code": 65302, "label": "6" }
|
||||
]
|
||||
} },
|
||||
{ "code": 19971, "label": "七", "type": "numeric", "popup": {
|
||||
"main": { "code": 55, "label": "7" },
|
||||
"relevant": [
|
||||
{ "code": 28422, "label": "漆" },
|
||||
{ "code": 26578, "label": "柒" },
|
||||
{ "code": 65303, "label": "7" }
|
||||
]
|
||||
} },
|
||||
{ "code": 20843, "label": "八", "type": "numeric", "popup": {
|
||||
"main": { "code": 56, "label": "8" },
|
||||
"relevant": [
|
||||
{ "code": 25420, "label": "捌" },
|
||||
{ "code": 65304, "label": "8" }
|
||||
]
|
||||
} },
|
||||
{ "code": 20061, "label": "九", "type": "numeric", "popup": {
|
||||
"main": {"code": 57, "label": "9" },
|
||||
"relevant": [
|
||||
{ "code": 29590, "label": "玖" },
|
||||
{ "code": 65305, "label": "9" }
|
||||
]
|
||||
} },
|
||||
{ "code": 38646, "label": "零", "type": "numeric", "popup": {
|
||||
"main": { "code": 48, "label": "0" },
|
||||
"relevant": [
|
||||
{ "code": 12295, "label": "〇" },
|
||||
{ "code": 65296, "label": "0" }
|
||||
]
|
||||
} },
|
||||
{ "code": 21313, "label": "十", "type": "numeric", "popup": {
|
||||
"main": { "code": 25342, "label": "拾" },
|
||||
"relevant": [
|
||||
{ "code": 20160, "label": "什" }
|
||||
]
|
||||
} },
|
||||
{ "code": 30334, "label": "百", "type": "numeric", "popup": {
|
||||
"main": { "code": 20336, "label": "佰" },
|
||||
"relevant": [
|
||||
{ "code": 38476, "label": "陌" }
|
||||
]
|
||||
} },
|
||||
{ "code": 21315, "label": "千", "type": "numeric", "popup": {
|
||||
"main": { "code": 20191, "label": "仟" },
|
||||
"relevant": [
|
||||
{ "code": 38433, "label": "阡" }
|
||||
]
|
||||
} }
|
||||
]
|
||||
]
|
||||
}
|
||||
35
app/src/main/assets/ime/text/numeric/western_arabic_pc.json
Normal file
35
app/src/main/assets/ime/text/numeric/western_arabic_pc.json
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"type": "numeric",
|
||||
"name": "western_arabic_pc",
|
||||
"label": "Western Arabic (PC)",
|
||||
"authors": [ "patrickgold" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": 55, "label": "7", "type": "numeric" },
|
||||
{ "code": 56, "label": "8", "type": "numeric" },
|
||||
{ "code": 57, "label": "9", "type": "numeric" },
|
||||
{ "code": 45, "label": "-" }
|
||||
],
|
||||
[
|
||||
{ "code": 52, "label": "4", "type": "numeric" },
|
||||
{ "code": 53, "label": "5", "type": "numeric" },
|
||||
{ "code": 54, "label": "6", "type": "numeric" },
|
||||
{ "code": 32, "label": "space" }
|
||||
],
|
||||
[
|
||||
{ "code": 49, "label": "1", "type": "numeric" },
|
||||
{ "code": 50, "label": "2", "type": "numeric" },
|
||||
{ "code": 51, "label": "3", "type": "numeric" },
|
||||
{ "code": -5, "label": "delete", "type": "enter_editing" }
|
||||
],
|
||||
[
|
||||
{ "code": 44, "label": "," },
|
||||
{ "code": 48, "label": "0", "type": "numeric", "popup": {
|
||||
"main": { "code": 43, "label": "+" }
|
||||
} },
|
||||
{ "code": 46, "label": "." },
|
||||
{ "code": 10, "label": "enter", "type": "enter_editing" }
|
||||
]
|
||||
]
|
||||
}
|
||||
283
app/src/main/assets/ime/text/symbols/cjk.json
Normal file
283
app/src/main/assets/ime/text/symbols/cjk.json
Normal file
@@ -0,0 +1,283 @@
|
||||
{
|
||||
"type": "symbols",
|
||||
"name": "cjk",
|
||||
"label": "CJK",
|
||||
"authors": [ "waelwindows" ],
|
||||
"direction": "ltr",
|
||||
"modifier": "cjk",
|
||||
"arrangement": [
|
||||
[
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65312, "label": "@", "popup": {
|
||||
"main": { "code": 64, "label": "@" }
|
||||
}
|
||||
},
|
||||
"half": { "code": 64, "label": "@" }
|
||||
},
|
||||
{ "code": 12306, "label": "〒", "popup": {
|
||||
"main": { "code": 12320, "label": "〠" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65283, "label": "#", "popup": {
|
||||
"main": { "code": 35, "label": "#" },
|
||||
"relevant": [
|
||||
{ "code": 8470, "label": "№" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 35, "label": "#", "popup": {
|
||||
"main": { "code": 65283, "label": "#" },
|
||||
"relevant": [
|
||||
{ "code": 8470, "label": "№" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "code": -801, "label": "currency_slot_1", "popup": {
|
||||
"main": { "code": -802, "label": "currency_slot_2" },
|
||||
"relevant": [
|
||||
{ "code": -806, "label": "currency_slot_6" },
|
||||
{ "code": -803, "label": "currency_slot_3" },
|
||||
{ "code": -804, "label": "currency_slot_4" },
|
||||
{ "code": -805, "label": "currency_slot_5" }
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65285, "label": "%", "popup": {
|
||||
"main": { "code": 37, "label": "%" },
|
||||
"relevant": [
|
||||
{ "code": 8240, "label": "‰" },
|
||||
{ "code": 8453, "label": "℅" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 37, "label": "%", "popup": {
|
||||
"main": { "code": 65285, "label": "%" },
|
||||
"relevant": [
|
||||
{ "code": 8240, "label": "‰" },
|
||||
{ "code": 8453, "label": "℅" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65286, "label": "&", "popup": {
|
||||
"main": { "code": 38, "label": "&" }
|
||||
}
|
||||
},
|
||||
"half": { "code": 38, "label": "&", "popup": {
|
||||
"main": { "code": 65286, "label": "&" }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65293, "label": "-", "popup": {
|
||||
"main": { "code": 65343, "label": "_" },
|
||||
"relevant": [
|
||||
{ "code": 45, "label": "-" },
|
||||
{ "code": 95, "label": "_" },
|
||||
{ "code": 8212, "label": "—" },
|
||||
{ "code": 8211, "label": "–" },
|
||||
{ "code": 183, "label": "·" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 45, "label": "-", "popup": {
|
||||
"main": { "code": 95, "label": "_" },
|
||||
"relevant": [
|
||||
{ "code": 65293, "label": "-" },
|
||||
{ "code": 65343, "label": "_" },
|
||||
{ "code": 8212, "label": "—" },
|
||||
{ "code": 8211, "label": "–" },
|
||||
{ "code": 183, "label": "·" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65291, "label": "+", "popup": {
|
||||
"main": { "code": 43, "label": "+" },
|
||||
"relevant": [
|
||||
{ "code": 177, "label": "±" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 43, "label": "+", "popup": {
|
||||
"main": { "code": 65291, "label": "+" },
|
||||
"relevant": [
|
||||
{ "code": 177, "label": "±" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 12300, "label": "「", "popup": {
|
||||
"main": { "code": 12302, "label": "『" },
|
||||
"relevant": [
|
||||
{ "code": 12304, "label": "【" },
|
||||
{ "code": 12310, "label": "〖" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 65378, "label": "「", "popup": {
|
||||
"main": { "code": 12301, "label": "」" },
|
||||
"relevant": [
|
||||
{ "code": 12303, "label": "』" },
|
||||
{ "code": 12304, "label": "【" },
|
||||
{ "code": 12310, "label": "〖" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 12301, "label": "」", "popup": {
|
||||
"main": { "code": 12303, "label": "』" },
|
||||
"relevant": [
|
||||
{ "code": 12305, "label": "】" },
|
||||
{ "code": 12311, "label": "〗" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 65379, "label": "」", "popup": {
|
||||
"main": { "code": 12301, "label": "」" },
|
||||
"relevant": [
|
||||
{ "code": 12303, "label": "』" },
|
||||
{ "code": 12305, "label": "】" },
|
||||
{ "code": 12311, "label": "〗" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65295, "label": "/", "popup": {
|
||||
"main": { "code": 47, "label": "/" }
|
||||
}
|
||||
},
|
||||
"half": { "code": 47, "label": "/", "popup": {
|
||||
"main": { "code": 65295, "label": "/" }
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65290, "label": "*", "popup": {
|
||||
"main": { "code": 8251, "label": "※" },
|
||||
"relevant": [
|
||||
{ "code": 42, "label": "*" },
|
||||
{ "code": 8224, "label": "†" },
|
||||
{ "code": 9733, "label": "★" },
|
||||
{ "code": 8225, "label": "‡" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 42, "label": "*", "popup": {
|
||||
"main": { "code": 65290, "label": "*" },
|
||||
"relevant": [
|
||||
{ "code": 8251, "label": "※" },
|
||||
{ "code": 8224, "label": "†" },
|
||||
{ "code": 9733, "label": "★" },
|
||||
{ "code": 8225, "label": "‡" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "code": 34, "label": "\"", "popup": {
|
||||
"main": { "code": 8221, "label": "”" },
|
||||
"relevant": [
|
||||
{ "code": 8222, "label": "„" },
|
||||
{ "code": 8220, "label": "“" },
|
||||
{ "code": 171, "label": "«" },
|
||||
{ "code": 187, "label": "»" }
|
||||
]
|
||||
}
|
||||
},
|
||||
{ "code": 39, "label": "'", "popup": {
|
||||
"main": { "code": 8217, "label": "’" },
|
||||
"relevant": [
|
||||
{ "code": 8218, "label": "‚" },
|
||||
{ "code": 8216, "label": "‘" },
|
||||
{ "code": 8249, "label": "‹" },
|
||||
{ "code": 8250, "label": "›" }
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65306, "label": ":", "popup": {
|
||||
"main": { "code": 58, "label": ":" },
|
||||
"relevant": [
|
||||
{ "code": 8942, "label": "⋮" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 58, "label": ":", "popup": {
|
||||
"main": { "code": 65306, "label": ":" },
|
||||
"relevant": [
|
||||
{ "code": 8942, "label": "⋮" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65307, "label": ";", "popup": {
|
||||
"main": { "code": 59, "label": ";" }
|
||||
}
|
||||
},
|
||||
"half": { "code": 59, "label": ";", "popup": {
|
||||
"main": { "code": 65307, "label": ";" }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65281, "label": "!", "popup": {
|
||||
"main": { "code": 33, "label": "!" },
|
||||
"relevant": [
|
||||
{ "code": 161, "label": "¡" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 33, "label": "!", "popup": {
|
||||
"main": { "code": 65281, "label": "!" },
|
||||
"relevant": [
|
||||
{ "code": 161, "label": "¡" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$": "char_width_selector",
|
||||
"full": { "code": 65311, "label": "?", "popup": {
|
||||
"main": { "code": 63, "label": "?" },
|
||||
"relevant": [
|
||||
{ "code": 191, "label": "¿" },
|
||||
{ "code": 8253, "label": "‽" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 63, "label": "?", "popup": {
|
||||
"main": { "code": 65311, "label": "?" },
|
||||
"relevant": [
|
||||
{ "code": 191, "label": "¿" },
|
||||
{ "code": 8253, "label": "‽" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
55
app/src/main/assets/ime/text/symbols/mod/cjk.json
Normal file
55
app/src/main/assets/ime/text/symbols/mod/cjk.json
Normal file
@@ -0,0 +1,55 @@
|
||||
{
|
||||
"type": "symbols/mod",
|
||||
"name": "cjk",
|
||||
"label": "CJK",
|
||||
"authors": [ "waelwindows" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": -203, "label": "view_symbols2", "type": "system_gui" },
|
||||
{ "code": 0, "type": "placeholder" },
|
||||
{ "code": -5, "label": "delete", "type": "enter_editing" }
|
||||
],
|
||||
[
|
||||
{ "code": -201, "label": "view_characters", "type": "system_gui" },
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 12289, "label": "、", "popup": {
|
||||
"main": { "code": 44, "label": "," }
|
||||
}
|
||||
},
|
||||
"half": { "code": 65380, "label": "、", "popup": {
|
||||
"main": { "code": 44, "label": "," }
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "code": -205, "label": "view_numeric_advanced", "type": "system_gui" },
|
||||
{ "code": 12288, "label": "空白" },
|
||||
{ "code": -9701, "label": "char_width_switcher", "type": "system_gui", "popup": {
|
||||
"relevant": [
|
||||
{ "code": -9702, "label": "char_width_full", "type": "system_gui" },
|
||||
{ "code": -9703, "label": "char_width_half", "type": "system_gui" }
|
||||
]
|
||||
}
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 12290, "label": "。", "popup": {
|
||||
"main": { "code": 8230, "label": "…" },
|
||||
"relevant": [
|
||||
{ "code": 12539, "label": "・" },
|
||||
{ "code": 46, "label": "." }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 65377, "label": "。", "popup": {
|
||||
"main": { "code": 8230, "label": "…" },
|
||||
"relevant": [
|
||||
{ "code": 65381, "label": "・" },
|
||||
{ "code": 46, "label": "." }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "code": 10, "label": "enter", "groupId": 3, "type": "enter_editing" }
|
||||
]
|
||||
]
|
||||
}
|
||||
225
app/src/main/assets/ime/text/symbols2/cjk.json
Normal file
225
app/src/main/assets/ime/text/symbols2/cjk.json
Normal file
@@ -0,0 +1,225 @@
|
||||
{
|
||||
"type": "symbols2",
|
||||
"name": "cjk",
|
||||
"label": "CJK",
|
||||
"authors": [ "waelwindows" ],
|
||||
"direction": "ltr",
|
||||
"modifier": "cjk",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 12316, "label": "〜", "popup": {
|
||||
"main": { "code": 126, "label": "~" }
|
||||
}
|
||||
},
|
||||
"half": { "code": 126, "label": "~", "popup": {
|
||||
"main": { "code": 12316, "label": "〜" }
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65344, "label": "`", "popup": {
|
||||
"main": { "code": 96, "label": "`" }
|
||||
}
|
||||
},
|
||||
"half": { "code": 96, "label": "`", "popup": {
|
||||
"main": { "code": 65344, "label": "`" }
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65372, "label": "|", "popup": {
|
||||
"main": { "code": 124, "label": "|" }
|
||||
}
|
||||
},
|
||||
"half": { "code": 124, "label": "|", "popup": {
|
||||
"main": { "code": 65372, "label": "|" }
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full":
|
||||
{ "code": 12539, "label": "・", "popup": {
|
||||
"main": { "code": 9834, "label": "♪" },
|
||||
"relevant": [
|
||||
{ "code": 8226, "label": "•" },
|
||||
{ "code": 9827, "label": "♣" },
|
||||
{ "code": 9824, "label": "♠" },
|
||||
{ "code": 9829, "label": "♥" },
|
||||
{ "code": 9830, "label": "♦" }
|
||||
]
|
||||
} },
|
||||
"half":
|
||||
{ "code": 8226, "label": "•", "popup": {
|
||||
"main": { "code": 9834, "label": "♪" },
|
||||
"relevant": [
|
||||
{ "code": 12539, "label": "・" },
|
||||
{ "code": 9827, "label": "♣" },
|
||||
{ "code": 9824, "label": "♠" },
|
||||
{ "code": 9829, "label": "♥" },
|
||||
{ "code": 9830, "label": "♦" }
|
||||
]
|
||||
} }
|
||||
},
|
||||
{ "code": 8730, "label": "√" },
|
||||
{ "code": 960, "label": "π", "popup": {
|
||||
"main": { "code": 928, "label": "Π" },
|
||||
"relevant": [
|
||||
{ "code": 969, "label": "ω" },
|
||||
{ "code": 945, "label": "α" },
|
||||
{ "code": 946, "label": "β" },
|
||||
{ "code": 937, "label": "Ω" },
|
||||
{ "code": 956, "label": "μ" }
|
||||
]
|
||||
} },
|
||||
{ "code": 247, "label": "÷" },
|
||||
{ "code": 215, "label": "×" },
|
||||
{ "code": 182, "label": "¶" },
|
||||
{ "code": 8710, "label": "∆" }
|
||||
],
|
||||
[
|
||||
{ "code": -805, "label": "currency_slot_5" },
|
||||
{ "code": -804, "label": "currency_slot_4" },
|
||||
{ "code": -803, "label": "currency_slot_3" },
|
||||
{ "code": -802, "label": "currency_slot_2" },
|
||||
{ "code": 94, "label": "^", "popup": {
|
||||
"main": { "code": 8593, "label": "↑" },
|
||||
"relevant": [
|
||||
{ "code": 8592, "label": "←" },
|
||||
{ "code": 8595, "label": "↓" },
|
||||
{ "code": 8594, "label": "→" }
|
||||
]
|
||||
} },
|
||||
{ "code": 176, "label": "°", "popup": {
|
||||
"main": { "code": 8242, "label": "′" },
|
||||
"relevant": [
|
||||
{ "code": 8243, "label": "″" }
|
||||
]
|
||||
} },
|
||||
{ "$": "char_width_selector",
|
||||
"full":
|
||||
{ "code": 65309, "label": "=", "popup": {
|
||||
"main": { "code": 8800, "label": "≠" },
|
||||
"relevant": [
|
||||
{ "code": 61, "label": "=" },
|
||||
{ "code": 8734, "label": "∞" },
|
||||
{ "code": 8776, "label": "≈" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half":
|
||||
{ "code": 61, "label": "=", "popup": {
|
||||
"main": { "code": 8800, "label": "≠" },
|
||||
"relevant": [
|
||||
{ "code": 61, "label": "=" },
|
||||
{ "code": 8734, "label": "∞" },
|
||||
{ "code": 8776, "label": "≈" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65371, "label": "{", "popup": {
|
||||
"main": { "code": 65288, "label": "(" },
|
||||
"relevant": [
|
||||
{ "code": 123, "label": "{" },
|
||||
{ "code": 12308, "label": "〔" },
|
||||
{ "code": 12312, "label": "〘" },
|
||||
{ "code": 65375, "label": "⦅" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 123, "label": "{", "popup": {
|
||||
"main": { "code": 40, "label": "(" },
|
||||
"relevant": [
|
||||
{ "code": 65371, "label": "{" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65373, "label": "}", "popup": {
|
||||
"main": { "code": 65289, "label": ")" },
|
||||
"relevant": [
|
||||
{ "code": 125, "label": "}" },
|
||||
{ "code": 12309, "label": "〕" },
|
||||
{ "code": 12313, "label": "〙" },
|
||||
{ "code": 65376, "label": "⦆" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 125, "label": "}", "popup": {
|
||||
"main": { "code": 41, "label": ")" },
|
||||
"relevant": [
|
||||
{ "code": 65373, "label": "}" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65340, "label": "\", "popup": {
|
||||
"main": { "code": 92, "label": "\\" }
|
||||
}
|
||||
},
|
||||
"half": { "code": 92, "label": "\\", "popup": {
|
||||
"main": { "code": 65340, "label": "\" }
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
[
|
||||
{ "code": 12292, "label": "〄" },
|
||||
{ "code": 12293, "label": "々" },
|
||||
{ "code": 12294, "label": "〆" },
|
||||
{ "code": 12295, "label": "〇" },
|
||||
{ "$": "kana_selector",
|
||||
"hira": { "code": 12445, "label": "ゝ", "popup": {
|
||||
"main": { "code": 12446, "label": "ゞ" },
|
||||
"relevant": [
|
||||
{ "code": 12541, "label": "ヽ" },
|
||||
{ "code": 12542, "label": "ヾ" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"kata": { "code": 12541, "label": "ヽ", "popup": {
|
||||
"main": { "code": 12542, "label": "ヾ" },
|
||||
"relevant": [
|
||||
{ "code": 12445, "label": "ゝ" },
|
||||
{ "code": 12446, "label": "ゞ" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65339, "label": "[", "popup": {
|
||||
"main": { "code": 91, "label": "[" },
|
||||
"relevant": [
|
||||
{ "code": 12314, "label": "〚" },
|
||||
{ "code": 12304, "label": "【" },
|
||||
{ "code": 12310, "label": "〖" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 91, "label": "[", "popup": {
|
||||
"main": { "code": 65339, "label": "[" }
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "$": "char_width_selector",
|
||||
"full": { "code": 65341, "label": "]", "popup": {
|
||||
"main": { "code": 93, "label": "]" },
|
||||
"relevant": [
|
||||
{ "code": 12315, "label": "〛" },
|
||||
{ "code": 12305, "label": "】" },
|
||||
{ "code": 12311, "label": "〗" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"half": { "code": 93, "label": "]", "popup": {
|
||||
"main": { "code": 65341, "label": "]" }
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
45
app/src/main/assets/ime/text/symbols2/mod/cjk.json
Normal file
45
app/src/main/assets/ime/text/symbols2/mod/cjk.json
Normal file
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"type": "symbols2/mod",
|
||||
"name": "cjk",
|
||||
"label": "CJK",
|
||||
"authors": [ "waelwindows" ],
|
||||
"direction": "ltr",
|
||||
"arrangement": [
|
||||
[
|
||||
{ "code": -202, "label": "view_symbols", "type": "system_gui" },
|
||||
{ "code": 0, "type": "placeholder" },
|
||||
{ "code": -5, "label": "delete", "type": "enter_editing" }
|
||||
],
|
||||
[
|
||||
{ "code": -201, "label": "view_characters", "type": "system_gui" },
|
||||
{ "code": 12296, "label": "〈", "popup": {
|
||||
"main": { "code": 12298, "label": "《" },
|
||||
"relevant": [
|
||||
{ "code": 8804, "label": "≤" },
|
||||
{ "code": 8249, "label": "‹" },
|
||||
{ "code": 10216, "label": "⟨" },
|
||||
{ "code": 65308, "label": "<" }
|
||||
]
|
||||
} },
|
||||
{ "code": -205, "label": "view_numeric_advanced", "type": "system_gui" },
|
||||
{ "code": 12288, "label": "空白" },
|
||||
{ "code": -9701, "label": "char_width_switcher", "type": "system_gui", "popup": {
|
||||
"relevant": [
|
||||
{ "code": -9702, "label": "char_width_full", "type": "system_gui" },
|
||||
{ "code": -9703, "label": "char_width_half", "type": "system_gui" }
|
||||
]
|
||||
}
|
||||
},
|
||||
{ "code": 12297, "label": "〉", "popup": {
|
||||
"main": { "code": 12299, "label": "》" },
|
||||
"relevant": [
|
||||
{ "code": 62, "label": ">" },
|
||||
{ "code": 8805, "label": "≥" },
|
||||
{ "code": 10217, "label": "⟩" },
|
||||
{ "code": 65310, "label": ">" }
|
||||
]
|
||||
} },
|
||||
{ "code": 10, "label": "enter", "groupId": 3, "type": "enter_editing" }
|
||||
]
|
||||
]
|
||||
}
|
||||
@@ -1130,680 +1130,171 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
<h3>Nuspell</h3>
|
||||
<span>Copyright (c) 2021 Nuspell</span>
|
||||
<pre>
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. http://fsf.org/
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
one line to give the program's name and a brief idea of what it does.
|
||||
Copyright (C) year name of author
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see http://www.gnu.org/licenses/.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
p Copyright (C) year name of author
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
http://www.gnu.org/licenses/.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
http://www.gnu.org/philosophy/why-not-lgpl.html.
|
||||
|
||||
This version of the GNU Lesser General Public License incorporates
|
||||
the terms and conditions of version 3 of the GNU General Public
|
||||
License, supplemented by the additional permissions listed below.
|
||||
|
||||
0. Additional Definitions.
|
||||
|
||||
As used herein, "this License" refers to version 3 of the GNU Lesser
|
||||
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
||||
General Public License.
|
||||
|
||||
"The Library" refers to a covered work governed by this License,
|
||||
other than an Application or a Combined Work as defined below.
|
||||
|
||||
An "Application" is any work that makes use of an interface provided
|
||||
by the Library, but which is not otherwise based on the Library.
|
||||
Defining a subclass of a class defined by the Library is deemed a mode
|
||||
of using an interface provided by the Library.
|
||||
|
||||
A "Combined Work" is a work produced by combining or linking an
|
||||
Application with the Library. The particular version of the Library
|
||||
with which the Combined Work was made is also called the "Linked
|
||||
Version".
|
||||
|
||||
The "Minimal Corresponding Source" for a Combined Work means the
|
||||
Corresponding Source for the Combined Work, excluding any source code
|
||||
for portions of the Combined Work that, considered in isolation, are
|
||||
based on the Application, and not on the Linked Version.
|
||||
|
||||
The "Corresponding Application Code" for a Combined Work means the
|
||||
object code and/or source code for the Application, including any data
|
||||
and utility programs needed for reproducing the Combined Work from the
|
||||
Application, but excluding the System Libraries of the Combined Work.
|
||||
|
||||
1. Exception to Section 3 of the GNU GPL.
|
||||
|
||||
You may convey a covered work under sections 3 and 4 of this License
|
||||
without being bound by section 3 of the GNU GPL.
|
||||
|
||||
2. Conveying Modified Versions.
|
||||
|
||||
If you modify a copy of the Library, and, in your modifications, a
|
||||
facility refers to a function or data to be supplied by an Application
|
||||
that uses the facility (other than as an argument passed when the
|
||||
facility is invoked), then you may convey a copy of the modified
|
||||
version:
|
||||
|
||||
a) under this License, provided that you make a good faith effort to
|
||||
ensure that, in the event an Application does not supply the
|
||||
function or data, the facility still operates, and performs
|
||||
whatever part of its purpose remains meaningful, or
|
||||
|
||||
b) under the GNU GPL, with none of the additional permissions of
|
||||
this License applicable to that copy.
|
||||
|
||||
3. Object Code Incorporating Material from Library Header Files.
|
||||
|
||||
The object code form of an Application may incorporate material from
|
||||
a header file that is part of the Library. You may convey such object
|
||||
code under terms of your choice, provided that, if the incorporated
|
||||
material is not limited to numerical parameters, data structure
|
||||
layouts and accessors, or small macros, inline functions and templates
|
||||
(ten or fewer lines in length), you do both of the following:
|
||||
|
||||
a) Give prominent notice with each copy of the object code that the
|
||||
Library is used in it and that the Library and its use are
|
||||
covered by this License.
|
||||
|
||||
b) Accompany the object code with a copy of the GNU GPL and this license
|
||||
document.
|
||||
|
||||
4. Combined Works.
|
||||
|
||||
You may convey a Combined Work under terms of your choice that,
|
||||
taken together, effectively do not restrict modification of the
|
||||
portions of the Library contained in the Combined Work and reverse
|
||||
engineering for debugging such modifications, if you also do each of
|
||||
the following:
|
||||
|
||||
a) Give prominent notice with each copy of the Combined Work that
|
||||
the Library is used in it and that the Library and its use are
|
||||
covered by this License.
|
||||
|
||||
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
||||
document.
|
||||
|
||||
c) For a Combined Work that displays copyright notices during
|
||||
execution, include the copyright notice for the Library among
|
||||
these notices, as well as a reference directing the user to the
|
||||
copies of the GNU GPL and this license document.
|
||||
|
||||
d) Do one of the following:
|
||||
|
||||
0) Convey the Minimal Corresponding Source under the terms of this
|
||||
License, and the Corresponding Application Code in a form
|
||||
suitable for, and under terms that permit, the user to
|
||||
recombine or relink the Application with a modified version of
|
||||
the Linked Version to produce a modified Combined Work, in the
|
||||
manner specified by section 6 of the GNU GPL for conveying
|
||||
Corresponding Source.
|
||||
|
||||
1) Use a suitable shared library mechanism for linking with the
|
||||
Library. A suitable mechanism is one that (a) uses at run time
|
||||
a copy of the Library already present on the user's computer
|
||||
system, and (b) will operate properly with a modified version
|
||||
of the Library that is interface-compatible with the Linked
|
||||
Version.
|
||||
|
||||
e) Provide Installation Information, but only if you would otherwise
|
||||
be required to provide such information under section 6 of the
|
||||
GNU GPL, and only to the extent that such information is
|
||||
necessary to install and execute a modified version of the
|
||||
Combined Work produced by recombining or relinking the
|
||||
Application with a modified version of the Linked Version. (If
|
||||
you use option 4d0, the Installation Information must accompany
|
||||
the Minimal Corresponding Source and Corresponding Application
|
||||
Code. If you use option 4d1, you must provide the Installation
|
||||
Information in the manner specified by section 6 of the GNU GPL
|
||||
for conveying Corresponding Source.)
|
||||
|
||||
5. Combined Libraries.
|
||||
|
||||
You may place library facilities that are a work based on the
|
||||
Library side by side in a single library together with other library
|
||||
facilities that are not Applications and are not covered by this
|
||||
License, and convey such a combined library under terms of your
|
||||
choice, if you do both of the following:
|
||||
|
||||
a) Accompany the combined library with a copy of the same work based
|
||||
on the Library, uncombined with any other library facilities,
|
||||
conveyed under the terms of this License.
|
||||
|
||||
b) Give prominent notice with the combined library that part of it
|
||||
is a work based on the Library, and explaining where to find the
|
||||
accompanying uncombined form of the same work.
|
||||
|
||||
6. Revised Versions of the GNU Lesser General Public License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions
|
||||
of the GNU Lesser General Public License from time to time. Such new
|
||||
versions will be similar in spirit to the present version, but may
|
||||
differ in detail to address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Library as you received it specifies that a certain numbered version
|
||||
of the GNU Lesser General Public License "or any later version"
|
||||
applies to it, you have the option of following the terms and
|
||||
conditions either of that published version or of any later version
|
||||
published by the Free Software Foundation. If the Library as you
|
||||
received it does not specify a version number of the GNU Lesser
|
||||
General Public License, you may choose any version of the GNU Lesser
|
||||
General Public License ever published by the Free Software Foundation.
|
||||
|
||||
If the Library as you received it specifies that a proxy can decide
|
||||
whether future versions of the GNU Lesser General Public License shall
|
||||
apply, that proxy's public statement of acceptance of any version is
|
||||
permanent authorization for you to choose that version for the
|
||||
Library.
|
||||
</pre>
|
||||
|
||||
<h3>Timber</h3>
|
||||
|
||||
@@ -10,21 +10,15 @@ set(CMAKE_CXX_STANDARD 17)
|
||||
include_directories(.)
|
||||
|
||||
### ICU4C ###
|
||||
include_directories(icu4c/include)
|
||||
#link_directories(${CMAKE_SOURCE_DIR}/../${ANDROID_ABI})
|
||||
set(JNI_LIBS ${CMAKE_SOURCE_DIR}/../jniLibs/${ANDROID_ABI})
|
||||
include_directories(../icu4c/prebuilt/include)
|
||||
set(JNI_LIBS ${CMAKE_SOURCE_DIR}/../icu4c/prebuilt/jniLibs/${ANDROID_ABI})
|
||||
add_library(ICU::data STATIC IMPORTED)
|
||||
set_property(TARGET ICU::data PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicudata.a")
|
||||
#add_library(ICU::i18n STATIC IMPORTED)
|
||||
#set_property(TARGET ICU::i18n PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicui18n.a")
|
||||
#add_library(ICU::tu STATIC IMPORTED)
|
||||
#set_property(TARGET ICU::tu PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicutu.a")
|
||||
add_library(ICU::uc STATIC IMPORTED)
|
||||
set_property(TARGET ICU::uc PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicuuc.a")
|
||||
|
||||
### FlorisBoard ###
|
||||
add_subdirectory(nuspell)
|
||||
add_subdirectory(glob_ndk)
|
||||
add_subdirectory(utils)
|
||||
add_subdirectory(ime/nlp)
|
||||
add_subdirectory(ime/spelling)
|
||||
@@ -32,6 +26,7 @@ add_subdirectory(ime/spelling)
|
||||
add_library(
|
||||
florisboard-native
|
||||
SHARED
|
||||
dev_patrickgold_florisboard_FlorisApplication.cpp
|
||||
dev_patrickgold_florisboard_ime_nlp_SuggestionList.cpp
|
||||
dev_patrickgold_florisboard_ime_spelling_SpellingDict.cpp
|
||||
)
|
||||
@@ -44,7 +39,6 @@ target_link_libraries(
|
||||
# Sources
|
||||
android
|
||||
log
|
||||
glob_ndk
|
||||
ICU::uc
|
||||
ICU::data
|
||||
Nuspell::nuspell
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <jni.h>
|
||||
#include <unicode/udata.h>
|
||||
#include "utils/jni_utils.h"
|
||||
|
||||
#pragma ide diagnostic ignored "UnusedLocalVariable"
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jint JNICALL
|
||||
Java_dev_patrickgold_florisboard_FlorisApplication_00024Companion_nativeInitICUData(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jobject path) {
|
||||
auto path_str = utils::j2std_string(env, path);
|
||||
std::ifstream in_file(path_str, std::ios::in | std::ios::binary);
|
||||
if (!in_file) {
|
||||
return U_FILE_ACCESS_ERROR;
|
||||
}
|
||||
in_file.seekg(0, std::ios::end);
|
||||
size_t size = in_file.tellg();
|
||||
if (size <= 0) {
|
||||
return U_FILE_ACCESS_ERROR;
|
||||
}
|
||||
in_file.seekg(0, std::ios::beg);
|
||||
char *icu_data = new char[size + 1];
|
||||
in_file.read(icu_data, size);
|
||||
if (!in_file) {
|
||||
in_file.close();
|
||||
return U_FILE_ACCESS_ERROR;
|
||||
}
|
||||
icu_data[size] = 0;
|
||||
in_file.close();
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
udata_setCommonData(reinterpret_cast<void *>(icu_data), &status);
|
||||
return status;
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
add_library(glob_ndk
|
||||
glob_ndk.c glob_ndk.h)
|
||||
@@ -1,906 +0,0 @@
|
||||
/*
|
||||
* Natanael Arndt, 2011: removed collate.h dependencies
|
||||
* (my changes are trivial)
|
||||
*
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Guido van Rossum.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
/*
|
||||
* glob(3) -- a superset of the one defined in POSIX 1003.2.
|
||||
*
|
||||
* The [!...] convention to negate a range is supported (SysV, Posix, ksh).
|
||||
*
|
||||
* Optional extra services, controlled by flags not defined by POSIX:
|
||||
*
|
||||
* GLOB_QUOTE:
|
||||
* Escaping convention: \ inhibits any special meaning the following
|
||||
* character might have (except \ at end of string is retained).
|
||||
* GLOB_MAGCHAR:
|
||||
* Set in gl_flags if pattern contained a globbing character.
|
||||
* GLOB_NOMAGIC:
|
||||
* Same as GLOB_NOCHECK, but it will only append pattern if it did
|
||||
* not contain any magic characters. [Used in csh style globbing]
|
||||
* GLOB_ALTDIRFUNC:
|
||||
* Use alternately specified directory access functions.
|
||||
* GLOB_TILDE:
|
||||
* expand ~user/foo to the /home/dir/of/user/foo
|
||||
* GLOB_BRACE:
|
||||
* expand {1,2}{a,b} to 1a 1b 2a 2b
|
||||
* gl_matchc:
|
||||
* Number of matches in the current invocation of glob.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Some notes on multibyte character support:
|
||||
* 1. Patterns with illegal byte sequences match nothing - even if
|
||||
* GLOB_NOCHECK is specified.
|
||||
* 2. Illegal byte sequences in filenames are handled by treating them as
|
||||
* single-byte characters with a value of the first byte of the sequence
|
||||
* cast to wchar_t.
|
||||
* 3. State-dependent encodings are not currently supported.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include "glob_ndk.h"
|
||||
#include <limits.h>
|
||||
#include <pwd.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#define DOLLAR '$'
|
||||
#define DOT '.'
|
||||
#define EOS '\0'
|
||||
#define LBRACKET '['
|
||||
#define NOT '!'
|
||||
#define QUESTION '?'
|
||||
#define QUOTE '\\'
|
||||
#define RANGE '-'
|
||||
#define RBRACKET ']'
|
||||
#define SEP '/'
|
||||
#define STAR '*'
|
||||
#define TILDE '~'
|
||||
#define UNDERSCORE '_'
|
||||
#define LBRACE '{'
|
||||
#define RBRACE '}'
|
||||
#define SLASH '/'
|
||||
#define COMMA ','
|
||||
|
||||
#ifndef DEBUG
|
||||
|
||||
#define M_QUOTE 0x8000000000ULL
|
||||
#define M_PROTECT 0x4000000000ULL
|
||||
#define M_MASK 0xffffffffffULL
|
||||
#define M_CHAR 0x00ffffffffULL
|
||||
|
||||
typedef uint_fast64_t Char;
|
||||
|
||||
#else
|
||||
|
||||
#define M_QUOTE 0x80
|
||||
#define M_PROTECT 0x40
|
||||
#define M_MASK 0xff
|
||||
#define M_CHAR 0x7f
|
||||
|
||||
typedef char Char;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define CHAR(c) ((Char)((c)&M_CHAR))
|
||||
#define META(c) ((Char)((c)|M_QUOTE))
|
||||
#define M_ALL META('*')
|
||||
#define M_END META(']')
|
||||
#define M_NOT META('!')
|
||||
#define M_ONE META('?')
|
||||
#define M_RNG META('-')
|
||||
#define M_SET META('[')
|
||||
#define ismeta(c) (((c)&M_QUOTE) != 0)
|
||||
|
||||
|
||||
static int compare(const void *, const void *);
|
||||
static int g_Ctoc(const Char *, char *, size_t);
|
||||
static int g_lstat(Char *, struct stat *, glob_t *);
|
||||
static DIR *g_opendir(Char *, glob_t *);
|
||||
static const Char *g_strchr(const Char *, wchar_t);
|
||||
#ifdef notdef
|
||||
static Char *g_strcat(Char *, const Char *);
|
||||
#endif
|
||||
static int g_stat(Char *, struct stat *, glob_t *);
|
||||
static int glob0(const Char *, glob_t *, size_t *);
|
||||
static int glob1(Char *, glob_t *, size_t *);
|
||||
static int glob2(Char *, Char *, Char *, Char *, glob_t *, size_t *);
|
||||
static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, size_t *);
|
||||
static int globextend(const Char *, glob_t *, size_t *);
|
||||
static const Char *
|
||||
globtilde(const Char *, Char *, size_t, glob_t *);
|
||||
static int globexp1(const Char *, glob_t *, size_t *);
|
||||
static int globexp2(const Char *, const Char *, glob_t *, int *, size_t *);
|
||||
static int match(Char *, Char *, Char *);
|
||||
#ifdef DEBUG
|
||||
static void qprintf(const char *, Char *);
|
||||
#endif
|
||||
|
||||
int
|
||||
glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob)
|
||||
{
|
||||
const char *patnext;
|
||||
size_t limit;
|
||||
Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
|
||||
mbstate_t mbs;
|
||||
wchar_t wc;
|
||||
size_t clen;
|
||||
|
||||
patnext = pattern;
|
||||
if (!(flags & GLOB_APPEND)) {
|
||||
pglob->gl_pathc = 0;
|
||||
pglob->gl_pathv = NULL;
|
||||
if (!(flags & GLOB_DOOFFS))
|
||||
pglob->gl_offs = 0;
|
||||
}
|
||||
if (flags & GLOB_LIMIT) {
|
||||
limit = pglob->gl_matchc;
|
||||
if (limit == 0)
|
||||
limit = ARG_MAX;
|
||||
} else
|
||||
limit = 0;
|
||||
pglob->gl_flags = flags & ~GLOB_MAGCHAR;
|
||||
pglob->gl_errfunc = errfunc;
|
||||
pglob->gl_matchc = 0;
|
||||
|
||||
bufnext = patbuf;
|
||||
bufend = bufnext + MAXPATHLEN - 1;
|
||||
if (flags & GLOB_NOESCAPE) {
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
while (bufend - bufnext >= MB_CUR_MAX) {
|
||||
clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
|
||||
if (clen == (size_t)-1 || clen == (size_t)-2)
|
||||
return (GLOB_NOMATCH);
|
||||
else if (clen == 0)
|
||||
break;
|
||||
*bufnext++ = wc;
|
||||
patnext += clen;
|
||||
}
|
||||
} else {
|
||||
/* Protect the quoted characters. */
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
while (bufend - bufnext >= MB_CUR_MAX) {
|
||||
if (*patnext == QUOTE) {
|
||||
if (*++patnext == EOS) {
|
||||
*bufnext++ = QUOTE | M_PROTECT;
|
||||
continue;
|
||||
}
|
||||
prot = M_PROTECT;
|
||||
} else
|
||||
prot = 0;
|
||||
clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
|
||||
if (clen == (size_t)-1 || clen == (size_t)-2)
|
||||
return (GLOB_NOMATCH);
|
||||
else if (clen == 0)
|
||||
break;
|
||||
*bufnext++ = wc | prot;
|
||||
patnext += clen;
|
||||
}
|
||||
}
|
||||
*bufnext = EOS;
|
||||
|
||||
if (flags & GLOB_BRACE)
|
||||
return globexp1(patbuf, pglob, &limit);
|
||||
else
|
||||
return glob0(patbuf, pglob, &limit);
|
||||
}
|
||||
|
||||
/*
|
||||
* Expand recursively a glob {} pattern. When there is no more expansion
|
||||
* invoke the standard globbing routine to glob the rest of the magic
|
||||
* characters
|
||||
*/
|
||||
static int
|
||||
globexp1(const Char *pattern, glob_t *pglob, size_t *limit)
|
||||
{
|
||||
const Char* ptr = pattern;
|
||||
int rv;
|
||||
|
||||
/* Protect a single {}, for find(1), like csh */
|
||||
if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
|
||||
return glob0(pattern, pglob, limit);
|
||||
|
||||
while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
|
||||
if (!globexp2(ptr, pattern, pglob, &rv, limit))
|
||||
return rv;
|
||||
|
||||
return glob0(pattern, pglob, limit);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Recursive brace globbing helper. Tries to expand a single brace.
|
||||
* If it succeeds then it invokes globexp1 with the new pattern.
|
||||
* If it fails then it tries to glob the rest of the pattern and returns.
|
||||
*/
|
||||
static int
|
||||
globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, size_t *limit)
|
||||
{
|
||||
int i;
|
||||
Char *lm, *ls;
|
||||
const Char *pe, *pm, *pm1, *pl;
|
||||
Char patbuf[MAXPATHLEN];
|
||||
|
||||
/* copy part up to the brace */
|
||||
for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
|
||||
continue;
|
||||
*lm = EOS;
|
||||
ls = lm;
|
||||
|
||||
/* Find the balanced brace */
|
||||
for (i = 0, pe = ++ptr; *pe; pe++)
|
||||
if (*pe == LBRACKET) {
|
||||
/* Ignore everything between [] */
|
||||
for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
|
||||
continue;
|
||||
if (*pe == EOS) {
|
||||
/*
|
||||
* We could not find a matching RBRACKET.
|
||||
* Ignore and just look for RBRACE
|
||||
*/
|
||||
pe = pm;
|
||||
}
|
||||
}
|
||||
else if (*pe == LBRACE)
|
||||
i++;
|
||||
else if (*pe == RBRACE) {
|
||||
if (i == 0)
|
||||
break;
|
||||
i--;
|
||||
}
|
||||
|
||||
/* Non matching braces; just glob the pattern */
|
||||
if (i != 0 || *pe == EOS) {
|
||||
*rv = glob0(patbuf, pglob, limit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0, pl = pm = ptr; pm <= pe; pm++)
|
||||
switch (*pm) {
|
||||
case LBRACKET:
|
||||
/* Ignore everything between [] */
|
||||
for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
|
||||
continue;
|
||||
if (*pm == EOS) {
|
||||
/*
|
||||
* We could not find a matching RBRACKET.
|
||||
* Ignore and just look for RBRACE
|
||||
*/
|
||||
pm = pm1;
|
||||
}
|
||||
break;
|
||||
|
||||
case LBRACE:
|
||||
i++;
|
||||
break;
|
||||
|
||||
case RBRACE:
|
||||
if (i) {
|
||||
i--;
|
||||
break;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
case COMMA:
|
||||
if (i && *pm == COMMA)
|
||||
break;
|
||||
else {
|
||||
/* Append the current string */
|
||||
for (lm = ls; (pl < pm); *lm++ = *pl++)
|
||||
continue;
|
||||
/*
|
||||
* Append the rest of the pattern after the
|
||||
* closing brace
|
||||
*/
|
||||
for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
|
||||
continue;
|
||||
|
||||
/* Expand the current pattern */
|
||||
#ifdef DEBUG
|
||||
qprintf("globexp2:", patbuf);
|
||||
#endif
|
||||
*rv = globexp1(patbuf, pglob, limit);
|
||||
|
||||
/* move after the comma, to the next string */
|
||||
pl = pm + 1;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
*rv = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* expand tilde from the passwd file.
|
||||
*/
|
||||
static const Char *
|
||||
globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
|
||||
{
|
||||
struct passwd *pwd;
|
||||
char *h;
|
||||
const Char *p;
|
||||
Char *b, *eb;
|
||||
|
||||
if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
|
||||
return pattern;
|
||||
|
||||
/*
|
||||
* Copy up to the end of the string or /
|
||||
*/
|
||||
eb = &patbuf[patbuf_len - 1];
|
||||
for (p = pattern + 1, h = (char *) patbuf;
|
||||
h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
|
||||
continue;
|
||||
|
||||
*h = EOS;
|
||||
|
||||
if (((char *) patbuf)[0] == EOS) {
|
||||
/*
|
||||
* handle a plain ~ or ~/ by expanding $HOME first (iff
|
||||
* we're not running setuid or setgid) and then trying
|
||||
* the password file
|
||||
*/
|
||||
if (issetugid() != 0 ||
|
||||
(h = getenv("HOME")) == NULL) {
|
||||
if (((h = getlogin()) != NULL &&
|
||||
(pwd = getpwnam(h)) != NULL) ||
|
||||
(pwd = getpwuid(getuid())) != NULL)
|
||||
h = pwd->pw_dir;
|
||||
else
|
||||
return pattern;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* Expand a ~user
|
||||
*/
|
||||
if ((pwd = getpwnam((char*) patbuf)) == NULL)
|
||||
return pattern;
|
||||
else
|
||||
h = pwd->pw_dir;
|
||||
}
|
||||
|
||||
/* Copy the home directory */
|
||||
for (b = patbuf; b < eb && *h; *b++ = *h++)
|
||||
continue;
|
||||
|
||||
/* Append the rest of the pattern */
|
||||
while (b < eb && (*b++ = *p++) != EOS)
|
||||
continue;
|
||||
*b = EOS;
|
||||
|
||||
return patbuf;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The main glob() routine: compiles the pattern (optionally processing
|
||||
* quotes), calls glob1() to do the real pattern matching, and finally
|
||||
* sorts the list (unless unsorted operation is requested). Returns 0
|
||||
* if things went well, nonzero if errors occurred.
|
||||
*/
|
||||
static int
|
||||
glob0(const Char *pattern, glob_t *pglob, size_t *limit)
|
||||
{
|
||||
const Char *qpatnext;
|
||||
int err;
|
||||
size_t oldpathc;
|
||||
Char *bufnext, c, patbuf[MAXPATHLEN];
|
||||
|
||||
qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
|
||||
oldpathc = pglob->gl_pathc;
|
||||
bufnext = patbuf;
|
||||
|
||||
/* We don't need to check for buffer overflow any more. */
|
||||
while ((c = *qpatnext++) != EOS) {
|
||||
switch (c) {
|
||||
case LBRACKET:
|
||||
c = *qpatnext;
|
||||
if (c == NOT)
|
||||
++qpatnext;
|
||||
if (*qpatnext == EOS ||
|
||||
g_strchr(qpatnext+1, RBRACKET) == NULL) {
|
||||
*bufnext++ = LBRACKET;
|
||||
if (c == NOT)
|
||||
--qpatnext;
|
||||
break;
|
||||
}
|
||||
*bufnext++ = M_SET;
|
||||
if (c == NOT)
|
||||
*bufnext++ = M_NOT;
|
||||
c = *qpatnext++;
|
||||
do {
|
||||
*bufnext++ = CHAR(c);
|
||||
if (*qpatnext == RANGE &&
|
||||
(c = qpatnext[1]) != RBRACKET) {
|
||||
*bufnext++ = M_RNG;
|
||||
*bufnext++ = CHAR(c);
|
||||
qpatnext += 2;
|
||||
}
|
||||
} while ((c = *qpatnext++) != RBRACKET);
|
||||
pglob->gl_flags |= GLOB_MAGCHAR;
|
||||
*bufnext++ = M_END;
|
||||
break;
|
||||
case QUESTION:
|
||||
pglob->gl_flags |= GLOB_MAGCHAR;
|
||||
*bufnext++ = M_ONE;
|
||||
break;
|
||||
case STAR:
|
||||
pglob->gl_flags |= GLOB_MAGCHAR;
|
||||
/* collapse adjacent stars to one,
|
||||
* to avoid exponential behavior
|
||||
*/
|
||||
if (bufnext == patbuf || bufnext[-1] != M_ALL)
|
||||
*bufnext++ = M_ALL;
|
||||
break;
|
||||
default:
|
||||
*bufnext++ = CHAR(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
*bufnext = EOS;
|
||||
#ifdef DEBUG
|
||||
qprintf("glob0:", patbuf);
|
||||
#endif
|
||||
|
||||
if ((err = glob1(patbuf, pglob, limit)) != 0)
|
||||
return(err);
|
||||
|
||||
/*
|
||||
* If there was no match we are going to append the pattern
|
||||
* if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
|
||||
* and the pattern did not contain any magic characters
|
||||
* GLOB_NOMAGIC is there just for compatibility with csh.
|
||||
*/
|
||||
if (pglob->gl_pathc == oldpathc) {
|
||||
if (((pglob->gl_flags & GLOB_NOCHECK) ||
|
||||
((pglob->gl_flags & GLOB_NOMAGIC) &&
|
||||
!(pglob->gl_flags & GLOB_MAGCHAR))))
|
||||
return(globextend(pattern, pglob, limit));
|
||||
else
|
||||
return(GLOB_NOMATCH);
|
||||
}
|
||||
if (!(pglob->gl_flags & GLOB_NOSORT))
|
||||
qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
|
||||
pglob->gl_pathc - oldpathc, sizeof(char *), compare);
|
||||
return(0);
|
||||
}
|
||||
|
||||
static int
|
||||
compare(const void *p, const void *q)
|
||||
{
|
||||
return(strcmp(*(char **)p, *(char **)q));
|
||||
}
|
||||
|
||||
static int
|
||||
glob1(Char *pattern, glob_t *pglob, size_t *limit)
|
||||
{
|
||||
Char pathbuf[MAXPATHLEN];
|
||||
|
||||
/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
|
||||
if (*pattern == EOS)
|
||||
return(0);
|
||||
return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
|
||||
pattern, pglob, limit));
|
||||
}
|
||||
|
||||
/*
|
||||
* The functions glob2 and glob3 are mutually recursive; there is one level
|
||||
* of recursion for each segment in the pattern that contains one or more
|
||||
* meta characters.
|
||||
*/
|
||||
static int
|
||||
glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
|
||||
glob_t *pglob, size_t *limit)
|
||||
{
|
||||
struct stat sb;
|
||||
Char *p, *q;
|
||||
int anymeta;
|
||||
|
||||
/*
|
||||
* Loop over pattern segments until end of pattern or until
|
||||
* segment with meta character found.
|
||||
*/
|
||||
for (anymeta = 0;;) {
|
||||
if (*pattern == EOS) { /* End of pattern? */
|
||||
*pathend = EOS;
|
||||
if (g_lstat(pathbuf, &sb, pglob))
|
||||
return(0);
|
||||
|
||||
if (((pglob->gl_flags & GLOB_MARK) &&
|
||||
pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
|
||||
|| (S_ISLNK(sb.st_mode) &&
|
||||
(g_stat(pathbuf, &sb, pglob) == 0) &&
|
||||
S_ISDIR(sb.st_mode)))) {
|
||||
if (pathend + 1 > pathend_last)
|
||||
return (GLOB_ABORTED);
|
||||
*pathend++ = SEP;
|
||||
*pathend = EOS;
|
||||
}
|
||||
++pglob->gl_matchc;
|
||||
return(globextend(pathbuf, pglob, limit));
|
||||
}
|
||||
|
||||
/* Find end of next segment, copy tentatively to pathend. */
|
||||
q = pathend;
|
||||
p = pattern;
|
||||
while (*p != EOS && *p != SEP) {
|
||||
if (ismeta(*p))
|
||||
anymeta = 1;
|
||||
if (q + 1 > pathend_last)
|
||||
return (GLOB_ABORTED);
|
||||
*q++ = *p++;
|
||||
}
|
||||
|
||||
if (!anymeta) { /* No expansion, do next segment. */
|
||||
pathend = q;
|
||||
pattern = p;
|
||||
while (*pattern == SEP) {
|
||||
if (pathend + 1 > pathend_last)
|
||||
return (GLOB_ABORTED);
|
||||
*pathend++ = *pattern++;
|
||||
}
|
||||
} else /* Need expansion, recurse. */
|
||||
return(glob3(pathbuf, pathend, pathend_last, pattern, p,
|
||||
pglob, limit));
|
||||
}
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
static int
|
||||
glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
|
||||
Char *pattern, Char *restpattern,
|
||||
glob_t *pglob, size_t *limit)
|
||||
{
|
||||
struct dirent *dp;
|
||||
DIR *dirp;
|
||||
int err;
|
||||
char buf[MAXPATHLEN];
|
||||
|
||||
/*
|
||||
* The readdirfunc declaration can't be prototyped, because it is
|
||||
* assigned, below, to two functions which are prototyped in glob.h
|
||||
* and dirent.h as taking pointers to differently typed opaque
|
||||
* structures.
|
||||
*/
|
||||
struct dirent *(*readdirfunc)();
|
||||
|
||||
if (pathend > pathend_last)
|
||||
return (GLOB_ABORTED);
|
||||
*pathend = EOS;
|
||||
errno = 0;
|
||||
|
||||
if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
|
||||
/* TODO: don't call for ENOENT or ENOTDIR? */
|
||||
if (pglob->gl_errfunc) {
|
||||
if (g_Ctoc(pathbuf, buf, sizeof(buf)))
|
||||
return (GLOB_ABORTED);
|
||||
if (pglob->gl_errfunc(buf, errno) ||
|
||||
pglob->gl_flags & GLOB_ERR)
|
||||
return (GLOB_ABORTED);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
err = 0;
|
||||
|
||||
/* Search directory for matching names. */
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
readdirfunc = pglob->gl_readdir;
|
||||
else
|
||||
readdirfunc = readdir;
|
||||
while ((dp = (*readdirfunc)(dirp))) {
|
||||
char *sc;
|
||||
Char *dc;
|
||||
wchar_t wc;
|
||||
size_t clen;
|
||||
mbstate_t mbs;
|
||||
|
||||
/* Initial DOT must be matched literally. */
|
||||
if (dp->d_name[0] == DOT && *pattern != DOT)
|
||||
continue;
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
dc = pathend;
|
||||
sc = dp->d_name;
|
||||
while (dc < pathend_last) {
|
||||
clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
|
||||
if (clen == (size_t)-1 || clen == (size_t)-2) {
|
||||
wc = *sc;
|
||||
clen = 1;
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
}
|
||||
if ((*dc++ = wc) == EOS)
|
||||
break;
|
||||
sc += clen;
|
||||
}
|
||||
if (!match(pathend, pattern, restpattern)) {
|
||||
*pathend = EOS;
|
||||
continue;
|
||||
}
|
||||
err = glob2(pathbuf, --dc, pathend_last, restpattern,
|
||||
pglob, limit);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
(*pglob->gl_closedir)(dirp);
|
||||
else
|
||||
closedir(dirp);
|
||||
return(err);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Extend the gl_pathv member of a glob_t structure to accomodate a new item,
|
||||
* add the new item, and update gl_pathc.
|
||||
*
|
||||
* This assumes the BSD realloc, which only copies the block when its size
|
||||
* crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
|
||||
* behavior.
|
||||
*
|
||||
* Return 0 if new item added, error code if memory couldn't be allocated.
|
||||
*
|
||||
* Invariant of the glob_t structure:
|
||||
* Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
|
||||
* gl_pathv points to (gl_offs + gl_pathc + 1) items.
|
||||
*/
|
||||
static int
|
||||
globextend(const Char *path, glob_t *pglob, size_t *limit)
|
||||
{
|
||||
char **pathv;
|
||||
size_t i, newsize, len;
|
||||
char *copy;
|
||||
const Char *p;
|
||||
|
||||
if (*limit && pglob->gl_pathc > *limit) {
|
||||
errno = 0;
|
||||
return (GLOB_NOSPACE);
|
||||
}
|
||||
|
||||
newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
|
||||
pathv = pglob->gl_pathv ?
|
||||
realloc((char *)pglob->gl_pathv, newsize) :
|
||||
malloc(newsize);
|
||||
if (pathv == NULL) {
|
||||
if (pglob->gl_pathv) {
|
||||
free(pglob->gl_pathv);
|
||||
pglob->gl_pathv = NULL;
|
||||
}
|
||||
return(GLOB_NOSPACE);
|
||||
}
|
||||
|
||||
if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
|
||||
/* first time around -- clear initial gl_offs items */
|
||||
pathv += pglob->gl_offs;
|
||||
for (i = pglob->gl_offs + 1; --i > 0; )
|
||||
*--pathv = NULL;
|
||||
}
|
||||
pglob->gl_pathv = pathv;
|
||||
|
||||
for (p = path; *p++;)
|
||||
continue;
|
||||
len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
|
||||
if ((copy = malloc(len)) != NULL) {
|
||||
if (g_Ctoc(path, copy, len)) {
|
||||
free(copy);
|
||||
return (GLOB_NOSPACE);
|
||||
}
|
||||
pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
|
||||
}
|
||||
pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
|
||||
return(copy == NULL ? GLOB_NOSPACE : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* pattern matching function for filenames. Each occurrence of the *
|
||||
* pattern causes a recursion level.
|
||||
*/
|
||||
static int
|
||||
match(Char *name, Char *pat, Char *patend)
|
||||
{
|
||||
int ok, negate_range;
|
||||
Char c, k;
|
||||
|
||||
while (pat < patend) {
|
||||
c = *pat++;
|
||||
switch (c & M_MASK) {
|
||||
case M_ALL:
|
||||
if (pat == patend)
|
||||
return(1);
|
||||
do
|
||||
if (match(name, pat, patend))
|
||||
return(1);
|
||||
while (*name++ != EOS);
|
||||
return(0);
|
||||
case M_ONE:
|
||||
if (*name++ == EOS)
|
||||
return(0);
|
||||
break;
|
||||
case M_SET:
|
||||
ok = 0;
|
||||
if ((k = *name++) == EOS)
|
||||
return(0);
|
||||
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
|
||||
++pat;
|
||||
while (((c = *pat++) & M_MASK) != M_END)
|
||||
if ((*pat & M_MASK) == M_RNG) {
|
||||
if (CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1])) ok = 1;
|
||||
pat += 2;
|
||||
} else if (c == k)
|
||||
ok = 1;
|
||||
if (ok == negate_range)
|
||||
return(0);
|
||||
break;
|
||||
default:
|
||||
if (*name++ != c)
|
||||
return(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return(*name == EOS);
|
||||
}
|
||||
|
||||
/* Free allocated data belonging to a glob_t structure. */
|
||||
void
|
||||
globfree(glob_t *pglob)
|
||||
{
|
||||
size_t i;
|
||||
char **pp;
|
||||
|
||||
if (pglob->gl_pathv != NULL) {
|
||||
pp = pglob->gl_pathv + pglob->gl_offs;
|
||||
for (i = pglob->gl_pathc; i--; ++pp)
|
||||
if (*pp)
|
||||
free(*pp);
|
||||
free(pglob->gl_pathv);
|
||||
pglob->gl_pathv = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static DIR *
|
||||
g_opendir(Char *str, glob_t *pglob)
|
||||
{
|
||||
char buf[MAXPATHLEN];
|
||||
|
||||
if (!*str)
|
||||
strcpy(buf, ".");
|
||||
else {
|
||||
if (g_Ctoc(str, buf, sizeof(buf)))
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
return((*pglob->gl_opendir)(buf));
|
||||
|
||||
return(opendir(buf));
|
||||
}
|
||||
|
||||
static int
|
||||
g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
|
||||
{
|
||||
char buf[MAXPATHLEN];
|
||||
|
||||
if (g_Ctoc(fn, buf, sizeof(buf))) {
|
||||
errno = ENAMETOOLONG;
|
||||
return (-1);
|
||||
}
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
return((*pglob->gl_lstat)(buf, sb));
|
||||
return(lstat(buf, sb));
|
||||
}
|
||||
|
||||
static int
|
||||
g_stat(Char *fn, struct stat *sb, glob_t *pglob)
|
||||
{
|
||||
char buf[MAXPATHLEN];
|
||||
|
||||
if (g_Ctoc(fn, buf, sizeof(buf))) {
|
||||
errno = ENAMETOOLONG;
|
||||
return (-1);
|
||||
}
|
||||
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
|
||||
return((*pglob->gl_stat)(buf, sb));
|
||||
return(stat(buf, sb));
|
||||
}
|
||||
|
||||
static const Char *
|
||||
g_strchr(const Char *str, wchar_t ch)
|
||||
{
|
||||
|
||||
do {
|
||||
if (*str == ch)
|
||||
return (str);
|
||||
} while (*str++);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
g_Ctoc(const Char *str, char *buf, size_t len)
|
||||
{
|
||||
mbstate_t mbs;
|
||||
size_t clen;
|
||||
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
while (len >= MB_CUR_MAX) {
|
||||
clen = wcrtomb(buf, *str, &mbs);
|
||||
if (clen == (size_t)-1)
|
||||
return (1);
|
||||
if (*str == L'\0')
|
||||
return (0);
|
||||
str++;
|
||||
buf += clen;
|
||||
len -= clen;
|
||||
}
|
||||
return (1);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static void
|
||||
qprintf(const char *str, Char *s)
|
||||
{
|
||||
Char *p;
|
||||
|
||||
(void)printf("%s:\n", str);
|
||||
for (p = s; *p; p++)
|
||||
(void)printf("%c", CHAR(*p));
|
||||
(void)printf("\n");
|
||||
for (p = s; *p; p++)
|
||||
(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
|
||||
(void)printf("\n");
|
||||
for (p = s; *p; p++)
|
||||
(void)printf("%c", ismeta(*p) ? '_' : ' ');
|
||||
(void)printf("\n");
|
||||
}
|
||||
#endif
|
||||
@@ -1,98 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Guido van Rossum.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)glob.h 8.1 (Berkeley) 6/2/93
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _GLOB_H_
|
||||
#define _GLOB_H_
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <glob.h>
|
||||
|
||||
/*#ifndef _SIZE_T_DECLARED
|
||||
typedef __size_t size_t;
|
||||
#define _SIZE_T_DECLARED
|
||||
#endif*/
|
||||
|
||||
struct stat;
|
||||
typedef struct {
|
||||
size_t gl_pathc; /* Count of total paths so far. */
|
||||
size_t gl_matchc; /* Count of paths matching pattern. */
|
||||
size_t gl_offs; /* Reserved at beginning of gl_pathv. */
|
||||
int gl_flags; /* Copy of flags parameter to glob. */
|
||||
char **gl_pathv; /* List of paths matching pattern. */
|
||||
/* Copy of errfunc parameter to glob. */
|
||||
int (*gl_errfunc)(const char *, int);
|
||||
|
||||
/*
|
||||
* Alternate filesystem access methods for glob; replacement
|
||||
* versions of closedir(3), readdir(3), opendir(3), stat(2)
|
||||
* and lstat(2).
|
||||
*/
|
||||
void (*gl_closedir)(void *);
|
||||
struct dirent *(*gl_readdir)(void *);
|
||||
void *(*gl_opendir)(const char *);
|
||||
int (*gl_lstat)(const char *, struct stat *);
|
||||
int (*gl_stat)(const char *, struct stat *);
|
||||
} glob_t;
|
||||
|
||||
/* Believed to have been introduced in 1003.2-1992 */
|
||||
#define GLOB_APPEND 0x0001 /* Append to output from previous call. */
|
||||
#define GLOB_DOOFFS 0x0002 /* Prepend `gl_offs` null pointers (leaving space for exec, say). */
|
||||
#define GLOB_ERR 0x0004 /* Return on error. */
|
||||
#define GLOB_MARK 0x0008 /* Append "/" to the names of returned directories. */
|
||||
#define GLOB_NOCHECK 0x0010 /* Return pattern itself if nothing matches. */
|
||||
#define GLOB_NOSORT 0x0020 /* Don't sort. */
|
||||
#define GLOB_NOESCAPE 0x2000 /* Disable backslash escaping. */
|
||||
|
||||
/* Error values returned by glob(3) */
|
||||
#define GLOB_NOSPACE (-1) /* Malloc call failed. */
|
||||
#define GLOB_ABORTED (-2) /* Unignored error. */
|
||||
#define GLOB_NOMATCH (-3) /* No match and GLOB_NOCHECK was not set. */
|
||||
|
||||
#define GLOB_ALTDIRFUNC 0x0040 /* Use alternately specified directory funcs. */
|
||||
#define GLOB_BRACE 0x0080 /* Expand braces like csh. */
|
||||
#define GLOB_MAGCHAR 0x0100 /* Set in `gl_flags` if the pattern had globbing characters. */
|
||||
#define GLOB_NOMAGIC 0x0200 /* GLOB_NOCHECK without magic chars (csh). */
|
||||
#define GLOB_QUOTE 0x0400 /* Quote special chars with \. */
|
||||
#define GLOB_TILDE 0x0800 /* Expand tilde names from the passwd file. */
|
||||
#define GLOB_LIMIT 0x1000 /* limit number of returned paths */
|
||||
|
||||
__BEGIN_DECLS
|
||||
int glob(const char *, int, int (*)(const char *, int), glob_t *);
|
||||
void globfree(glob_t *);
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_GLOB_H_ */
|
||||
Submodule app/src/main/cpp/icu4c/android deleted from 4574d1dddf
@@ -1,55 +0,0 @@
|
||||
{
|
||||
"strategy": "subtractive",
|
||||
"featureFilters": {
|
||||
"coll_ucadata": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"coll_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"confusables": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"curr_supplemental": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"curr_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"locales_tree": {
|
||||
"excludelist": [
|
||||
"en_US_POSIX"
|
||||
]
|
||||
},
|
||||
"misc": {
|
||||
"excludelist": [
|
||||
"currencyNumericCodes",
|
||||
"genderList"
|
||||
]
|
||||
},
|
||||
"region_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"rbnf_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"stringprep": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"translit": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"unames": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"unit_tree": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"zone_supplemental": {
|
||||
"filterType": "exclude"
|
||||
},
|
||||
"zone_tree": {
|
||||
"filterType": "exclude"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,106 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2021 Patrick Goldinger
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Build script for ICU4C, tailored exactly for FlorisBoard's needs.
|
||||
|
||||
# Before executing this script to manually rebuild the ICU libraries, make sure to execute
|
||||
# git submodule update --init --recursive
|
||||
# else the script won't find the ICU source files!
|
||||
|
||||
###### Build ICU4C ######
|
||||
|
||||
./android/cc-icu4c.sh build \
|
||||
--arch=arm,arm64 \
|
||||
--api=23 \
|
||||
--library-type=static \
|
||||
--build-dir=./build \
|
||||
--icu-src-dir=./android/icu/icu4c \
|
||||
--install-include-dir=./include \
|
||||
--install-libs-dir=./../../jniLibs \
|
||||
--data-filter-file=./data-feature-filter.json \
|
||||
--enable-collation=no \
|
||||
--enable-formatting=no \
|
||||
--enable-legacy-converters=yes \
|
||||
--enable-regex=no \
|
||||
--enable-transliteration=no
|
||||
|
||||
###### Clean up unused header files in include/unicode header dir ######
|
||||
|
||||
readonly SEP=":"
|
||||
readonly NUSPELL_DIR=$(realpath ../nuspell)
|
||||
readonly UNICODE_DIR=$(realpath include/unicode)
|
||||
|
||||
scan_file() {
|
||||
file=$1
|
||||
local -n var=$2
|
||||
#echo "Scanning '$file'..."
|
||||
while IFS= read -r line; do
|
||||
case $line in
|
||||
*"#include <unicode/"*)
|
||||
# shellcheck disable=SC2001
|
||||
header=$(sed -e 's/.*<unicode\/\(.*\)>.*/\1/' <<< "$line")
|
||||
;;
|
||||
*"#include \"unicode/"*)
|
||||
# shellcheck disable=SC2001
|
||||
header=$(sed -e 's/.*\"unicode\/\(.*\)\".*/\1/' <<< "$line")
|
||||
;;
|
||||
*)
|
||||
header=""
|
||||
;;
|
||||
esac
|
||||
if [ -z "$header" ]; then
|
||||
continue
|
||||
fi
|
||||
# shellcheck disable=SC2091
|
||||
# shellcheck disable=SC2086
|
||||
if [[ ! "$var" == *"$header"* ]]; then
|
||||
# shellcheck disable=SC2140
|
||||
var+="$SEP$header"
|
||||
fi
|
||||
done < "$file"
|
||||
}
|
||||
|
||||
req_headers=""
|
||||
|
||||
for nsrcfile in "$NUSPELL_DIR"/*; do
|
||||
scan_file "$nsrcfile" "req_headers"
|
||||
done
|
||||
|
||||
if [ -n "$req_headers" ]; then
|
||||
req_headers=${req_headers:1}
|
||||
fi
|
||||
|
||||
while true; do
|
||||
old_req_headers=$req_headers
|
||||
IFS="$SEP" read -ra req_header_splitted <<< "$req_headers"
|
||||
for req_header in "${req_header_splitted[@]}"; do
|
||||
scan_file "$UNICODE_DIR/$req_header" "req_headers"
|
||||
done
|
||||
[ ! $req_headers = $old_req_headers ] || break
|
||||
done
|
||||
|
||||
#echo "$req_headers"
|
||||
|
||||
for headerfile in "$UNICODE_DIR"/*; do
|
||||
header=$(basename "$headerfile")
|
||||
if [[ "$req_headers" == *"$header"* ]]; then
|
||||
echo "KEEP '$headerfile'"
|
||||
else
|
||||
echo "DELETE '$headerfile'"
|
||||
rm "$headerfile"
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -1,670 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1997-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
*
|
||||
* File brkiter.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
|
||||
* 05/07/97 aliu Fixed DLL declaration.
|
||||
* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
|
||||
* 08/11/98 helena Sync-up JDK1.2.
|
||||
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef BRKITER_H
|
||||
#define BRKITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Break Iterator.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Allow the declaration of APIs with pointers to BreakIterator
|
||||
* even when break iteration is removed from the build.
|
||||
*/
|
||||
class BreakIterator;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#else
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/strenum.h"
|
||||
#include "unicode/utext.h"
|
||||
#include "unicode/umisc.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* The BreakIterator class implements methods for finding the location
|
||||
* of boundaries in text. BreakIterator is an abstract base class.
|
||||
* Instances of BreakIterator maintain a current position and scan over
|
||||
* text returning the index of characters where boundaries occur.
|
||||
* <p>
|
||||
* Line boundary analysis determines where a text string can be broken
|
||||
* when line-wrapping. The mechanism correctly handles punctuation and
|
||||
* hyphenated words.
|
||||
* <p>
|
||||
* Sentence boundary analysis allows selection with correct
|
||||
* interpretation of periods within numbers and abbreviations, and
|
||||
* trailing punctuation marks such as quotation marks and parentheses.
|
||||
* <p>
|
||||
* Word boundary analysis is used by search and replace functions, as
|
||||
* well as within text editing applications that allow the user to
|
||||
* select words with a double click. Word selection provides correct
|
||||
* interpretation of punctuation marks within and following
|
||||
* words. Characters that are not part of a word, such as symbols or
|
||||
* punctuation marks, have word-breaks on both sides.
|
||||
* <p>
|
||||
* Character boundary analysis allows users to interact with
|
||||
* characters as they expect to, for example, when moving the cursor
|
||||
* through a text string. Character boundary analysis provides correct
|
||||
* navigation of through character strings, regardless of how the
|
||||
* character is stored. For example, an accented character might be
|
||||
* stored as a base character and a diacritical mark. What users
|
||||
* consider to be a character can differ between languages.
|
||||
* <p>
|
||||
* The text boundary positions are found according to the rules
|
||||
* described in Unicode Standard Annex #29, Text Boundaries, and
|
||||
* Unicode Standard Annex #14, Line Breaking Properties. These
|
||||
* are available at http://www.unicode.org/reports/tr14/ and
|
||||
* http://www.unicode.org/reports/tr29/.
|
||||
* <p>
|
||||
* In addition to the C++ API defined in this header file, a
|
||||
* plain C API with equivalent functionality is defined in the
|
||||
* file ubrk.h
|
||||
* <p>
|
||||
* Code snippets illustrating the use of the Break Iterator APIs
|
||||
* are available in the ICU User Guide,
|
||||
* http://icu-project.org/userguide/boundaryAnalysis.html
|
||||
* and in the sample program icu/source/samples/break/break.cpp
|
||||
*
|
||||
*/
|
||||
class U_COMMON_API BreakIterator : public UObject {
|
||||
public:
|
||||
/**
|
||||
* destructor
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~BreakIterator();
|
||||
|
||||
/**
|
||||
* Return true if another object is semantically equal to this
|
||||
* one. The other object should be an instance of the same subclass of
|
||||
* BreakIterator. Objects of different subclasses are considered
|
||||
* unequal.
|
||||
* <P>
|
||||
* Return true if this BreakIterator is at the same position in the
|
||||
* same text, and is the same class and type (word, line, etc.) of
|
||||
* BreakIterator, as the argument. Text is considered the same if
|
||||
* it contains the same characters, it need not be the same
|
||||
* object, and styles are not considered.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool operator==(const BreakIterator&) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the complement of the result of operator==
|
||||
* @param rhs The BreakIterator to be compared for inequality
|
||||
* @return the complement of the result of operator==
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
|
||||
|
||||
/**
|
||||
* Return a polymorphic copy of this object. This is an abstract
|
||||
* method which subclasses implement.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual BreakIterator* clone() const = 0;
|
||||
|
||||
/**
|
||||
* Return a polymorphic class ID for this object. Different subclasses
|
||||
* will return distinct unequal values.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID(void) const = 0;
|
||||
|
||||
/**
|
||||
* Return a CharacterIterator over the text being analyzed.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual CharacterIterator& getText(void) const = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Get a UText for the text being analyzed.
|
||||
* The returned UText is a shallow clone of the UText used internally
|
||||
* by the break iterator implementation. It can safely be used to
|
||||
* access the text without impacting any break iterator operations,
|
||||
* but the underlying text itself must not be altered.
|
||||
*
|
||||
* @param fillIn A UText to be filled in. If NULL, a new UText will be
|
||||
* allocated to hold the result.
|
||||
* @param status receives any error codes.
|
||||
* @return The current UText for this break iterator. If an input
|
||||
* UText was provided, it will always be returned.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
|
||||
|
||||
/**
|
||||
* Change the text over which this operates. The text boundary is
|
||||
* reset to the start.
|
||||
*
|
||||
* The BreakIterator will retain a reference to the supplied string.
|
||||
* The caller must not modify or delete the text while the BreakIterator
|
||||
* retains the reference.
|
||||
*
|
||||
* @param text The UnicodeString used to change the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void setText(const UnicodeString &text) = 0;
|
||||
|
||||
/**
|
||||
* Reset the break iterator to operate over the text represented by
|
||||
* the UText. The iterator position is reset to the start.
|
||||
*
|
||||
* This function makes a shallow clone of the supplied UText. This means
|
||||
* that the caller is free to immediately close or otherwise reuse the
|
||||
* Utext that was passed as a parameter, but that the underlying text itself
|
||||
* must not be altered while being referenced by the break iterator.
|
||||
*
|
||||
* All index positions returned by break iterator functions are
|
||||
* native indices from the UText. For example, when breaking UTF-8
|
||||
* encoded text, the break positions returned by next(), previous(), etc.
|
||||
* will be UTF-8 string indices, not UTF-16 positions.
|
||||
*
|
||||
* @param text The UText used to change the text.
|
||||
* @param status receives any error codes.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
virtual void setText(UText *text, UErrorCode &status) = 0;
|
||||
|
||||
/**
|
||||
* Change the text over which this operates. The text boundary is
|
||||
* reset to the start.
|
||||
* Note that setText(UText *) provides similar functionality to this function,
|
||||
* and is more efficient.
|
||||
* @param it The CharacterIterator used to change the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void adoptText(CharacterIterator* it) = 0;
|
||||
|
||||
enum {
|
||||
/**
|
||||
* DONE is returned by previous() and next() after all valid
|
||||
* boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
DONE = (int32_t)-1
|
||||
};
|
||||
|
||||
/**
|
||||
* Sets the current iteration position to the beginning of the text, position zero.
|
||||
* @return The offset of the beginning of the text, zero.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t first(void) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
|
||||
* @return The index immediately BEYOND the last character in the text being scanned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t last(void) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the boundary preceding the current boundary.
|
||||
* @return The character index of the previous text boundary or DONE if all
|
||||
* boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t previous(void) = 0;
|
||||
|
||||
/**
|
||||
* Advance the iterator to the boundary following the current boundary.
|
||||
* @return The character index of the next text boundary or DONE if all
|
||||
* boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t next(void) = 0;
|
||||
|
||||
/**
|
||||
* Return character index of the current iterator position within the text.
|
||||
* @return The boundary most recently returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t current(void) const = 0;
|
||||
|
||||
/**
|
||||
* Advance the iterator to the first boundary following the specified offset.
|
||||
* The value returned is always greater than the offset or
|
||||
* the value BreakIterator.DONE
|
||||
* @param offset the offset to begin scanning.
|
||||
* @return The first boundary after the specified offset.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t following(int32_t offset) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the first boundary preceding the specified offset.
|
||||
* The value returned is always smaller than the offset or
|
||||
* the value BreakIterator.DONE
|
||||
* @param offset the offset to begin scanning.
|
||||
* @return The first boundary before the specified offset.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t preceding(int32_t offset) = 0;
|
||||
|
||||
/**
|
||||
* Return true if the specified position is a boundary position.
|
||||
* As a side effect, the current position of the iterator is set
|
||||
* to the first boundary position at or following the specified offset.
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool isBoundary(int32_t offset) = 0;
|
||||
|
||||
/**
|
||||
* Set the iterator position to the nth boundary from the current boundary
|
||||
* @param n the number of boundaries to move by. A value of 0
|
||||
* does nothing. Negative values move to previous boundaries
|
||||
* and positive values move to later boundaries.
|
||||
* @return The new iterator position, or
|
||||
* DONE if there are fewer than |n| boundaries in the specified direction.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t next(int32_t n) = 0;
|
||||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, return the status tag from the break rule
|
||||
* that determined the boundary at the current iteration position.
|
||||
* <p>
|
||||
* For break iterator types that do not support a rule status,
|
||||
* a default value of 0 is returned.
|
||||
* <p>
|
||||
* @return the status from the break rule that determined the boundary at
|
||||
* the current iteration position.
|
||||
* @see RuleBaseBreakIterator::getRuleStatus()
|
||||
* @see UWordBreak
|
||||
* @stable ICU 52
|
||||
*/
|
||||
virtual int32_t getRuleStatus() const;
|
||||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
|
||||
* that determined the boundary at the current iteration position.
|
||||
* <p>
|
||||
* For break iterator types that do not support rule status,
|
||||
* no values are returned.
|
||||
* <p>
|
||||
* The returned status value(s) are stored into an array provided by the caller.
|
||||
* The values are stored in sorted (ascending) order.
|
||||
* If the capacity of the output array is insufficient to hold the data,
|
||||
* the output will be truncated to the available length, and a
|
||||
* U_BUFFER_OVERFLOW_ERROR will be signaled.
|
||||
* <p>
|
||||
* @see RuleBaseBreakIterator::getRuleStatusVec
|
||||
*
|
||||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the boundary at the current iteration position.
|
||||
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
|
||||
* is the total number of status values that were available,
|
||||
* not the reduced number that were actually returned.
|
||||
* @see getRuleStatus
|
||||
* @stable ICU 52
|
||||
*/
|
||||
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for word-breaks using the given locale.
|
||||
* Returns an instance of a BreakIterator implementing word breaks.
|
||||
* WordBreak is useful for word selection (ex. double click)
|
||||
* @param where the locale.
|
||||
* @param status the error code
|
||||
* @return A BreakIterator for word-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createWordInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for line-breaks using specified locale.
|
||||
* Returns an instance of a BreakIterator implementing line breaks. Line
|
||||
* breaks are logically possible line breaks, actual line breaks are
|
||||
* usually determined based on display width.
|
||||
* LineBreak is useful for word wrapping text.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for line-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createLineInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for character-breaks using specified locale
|
||||
* Returns an instance of a BreakIterator implementing character breaks.
|
||||
* Character breaks are boundaries of combining character sequences.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for character-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createCharacterInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for sentence-breaks using specified locale
|
||||
* Returns an instance of a BreakIterator implementing sentence breaks.
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for sentence-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createSentenceInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Create BreakIterator for title-casing breaks using the specified locale
|
||||
* Returns an instance of a BreakIterator implementing title breaks.
|
||||
* The iterator returned locates title boundaries as described for
|
||||
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
|
||||
* please use a word boundary iterator. See {@link #createWordInstance }.
|
||||
*
|
||||
* @param where the locale.
|
||||
* @param status The error code.
|
||||
* @return A BreakIterator for title-breaks. The UErrorCode& status
|
||||
* parameter is used to return status information to the user.
|
||||
* To check whether the construction succeeded or not, you should check
|
||||
* the value of U_SUCCESS(err). If you wish more detailed information, you
|
||||
* can check for informational error results which still indicate success.
|
||||
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
|
||||
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
|
||||
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
|
||||
* used; neither the requested locale nor any of its fall back locales
|
||||
* could be found.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @deprecated ICU 64 Use createWordInstance instead.
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createTitleInstance(const Locale& where, UErrorCode& status);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Get the set of Locales for which TextBoundaries are installed.
|
||||
* <p><b>Note:</b> this will not return locales added through the register
|
||||
* call. To see the registered locales too, use the getAvailableLocales
|
||||
* function that returns a StringEnumeration object </p>
|
||||
* @param count the output parameter of number of elements in the locale list
|
||||
* @return available locales
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
|
||||
|
||||
/**
|
||||
* Get name of the object for the desired Locale, in the desired language.
|
||||
* @param objectLocale must be from getAvailableLocales.
|
||||
* @param displayLocale specifies the desired locale for output.
|
||||
* @param name the fill-in parameter of the return value
|
||||
* Uses best match.
|
||||
* @return user-displayable name
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
|
||||
const Locale& displayLocale,
|
||||
UnicodeString& name);
|
||||
|
||||
/**
|
||||
* Get name of the object for the desired Locale, in the language of the
|
||||
* default locale.
|
||||
* @param objectLocale must be from getMatchingLocales
|
||||
* @param name the fill-in parameter of the return value
|
||||
* @return user-displayable name
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
|
||||
UnicodeString& name);
|
||||
|
||||
#ifndef U_FORCE_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Deprecated functionality. Use clone() instead.
|
||||
*
|
||||
* Thread safe client-buffer-based cloning operation
|
||||
* Do NOT call delete on a safeclone, since 'new' is not used to create it.
|
||||
* @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
|
||||
* If buffer is not large enough, new memory will be allocated.
|
||||
* @param BufferSize reference to size of allocated space.
|
||||
* If BufferSize == 0, a sufficient size for use in cloning will
|
||||
* be returned ('pre-flighting')
|
||||
* If BufferSize is not enough for a stack-based safe clone,
|
||||
* new memory will be allocated.
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
|
||||
* necessary.
|
||||
* @return pointer to the new clone
|
||||
*
|
||||
* @deprecated ICU 52. Use clone() instead.
|
||||
*/
|
||||
virtual BreakIterator * createBufferClone(void *stackBuffer,
|
||||
int32_t &BufferSize,
|
||||
UErrorCode &status) = 0;
|
||||
#endif // U_FORCE_HIDE_DEPRECATED_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Determine whether the BreakIterator was created in user memory by
|
||||
* createBufferClone(), and thus should not be deleted. Such objects
|
||||
* must be closed by an explicit call to the destructor (not delete).
|
||||
* @deprecated ICU 52. Always delete the BreakIterator.
|
||||
*/
|
||||
inline UBool isBufferClone(void);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
/**
|
||||
* Register a new break iterator of the indicated kind, to use in the given locale.
|
||||
* The break iterator will be adopted. Clones of the iterator will be returned
|
||||
* if a request for a break iterator of the given kind matches or falls back to
|
||||
* this locale.
|
||||
* Because ICU may choose to cache BreakIterators internally, this must
|
||||
* be called at application startup, prior to any calls to
|
||||
* BreakIterator::createXXXInstance to avoid undefined behavior.
|
||||
* @param toAdopt the BreakIterator instance to be adopted
|
||||
* @param locale the Locale for which this instance is to be registered
|
||||
* @param kind the type of iterator for which this instance is to be registered
|
||||
* @param status the in/out status code, no special meanings are assigned
|
||||
* @return a registry key that can be used to unregister this instance
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
|
||||
const Locale& locale,
|
||||
UBreakIteratorType kind,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Unregister a previously-registered BreakIterator using the key returned from the
|
||||
* register call. Key becomes invalid after a successful call and should not be used again.
|
||||
* The BreakIterator corresponding to the key will be deleted.
|
||||
* Because ICU may choose to cache BreakIterators internally, this should
|
||||
* be called during application shutdown, after all calls to
|
||||
* BreakIterator::createXXXInstance to avoid undefined behavior.
|
||||
* @param key the registry key returned by a previous call to registerInstance
|
||||
* @param status the in/out status code, no special meanings are assigned
|
||||
* @return true if the iterator for the key was successfully unregistered
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Return a StringEnumeration over the locales available at the time of the call,
|
||||
* including registered locales.
|
||||
* @return a StringEnumeration over the locales available at the time of the call
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the locale for this break iterator. Two flavors are available: valid and
|
||||
* actual locale.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Get the locale for this break iterator object. You can choose between valid and actual locale.
|
||||
* @param type type of the locale we're looking for (valid or actual)
|
||||
* @param status error code for the operation
|
||||
* @return the locale
|
||||
* @internal
|
||||
*/
|
||||
const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the matching state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator implementation never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized,
|
||||
* system-level code. One example use case is with garbage collection that moves
|
||||
* the text in memory.
|
||||
*
|
||||
* @param input The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @return *this
|
||||
*
|
||||
* @stable ICU 49
|
||||
*/
|
||||
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
|
||||
|
||||
private:
|
||||
static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
|
||||
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||
|
||||
friend class ICUBreakIteratorFactory;
|
||||
friend class ICUBreakIteratorService;
|
||||
|
||||
protected:
|
||||
// Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
|
||||
// or else the compiler will create a public ones.
|
||||
/** @internal */
|
||||
BreakIterator();
|
||||
/** @internal */
|
||||
BreakIterator (const BreakIterator &other);
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** @internal */
|
||||
BreakIterator (const Locale& valid, const Locale &actual);
|
||||
/** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
|
||||
BreakIterator &operator = (const BreakIterator &other);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
private:
|
||||
|
||||
/** @internal (private) */
|
||||
char actualLocale[ULOC_FULLNAME_CAPACITY];
|
||||
char validLocale[ULOC_FULLNAME_CAPACITY];
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
inline UBool BreakIterator::isBufferClone()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // BRKITER_H
|
||||
//eof
|
||||
@@ -1,307 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
// Copyright (C) 2009-2012, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Copyright 2007 Google Inc. All Rights Reserved.
|
||||
// Author: sanjay@google.com (Sanjay Ghemawat)
|
||||
//
|
||||
// Abstract interface that consumes a sequence of bytes (ByteSink).
|
||||
//
|
||||
// Used so that we can write a single piece of code that can operate
|
||||
// on a variety of output string types.
|
||||
//
|
||||
// Various implementations of this interface are provided:
|
||||
// ByteSink:
|
||||
// CheckedArrayByteSink Write to a flat array, with bounds checking
|
||||
// StringByteSink Write to an STL string
|
||||
|
||||
// This code is a contribution of Google code, and the style used here is
|
||||
// a compromise between the original Google code and the ICU coding guidelines.
|
||||
// For example, data types are ICU-ified (size_t,int->int32_t),
|
||||
// and API comments doxygen-ified, but function names and behavior are
|
||||
// as in the original, if possible.
|
||||
// Assertion-style error handling, not available in ICU, was changed to
|
||||
// parameter "pinning" similar to UnicodeString.
|
||||
//
|
||||
// In addition, this is only a partial port of the original Google code,
|
||||
// limited to what was needed so far. The (nearly) complete original code
|
||||
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
|
||||
// (see ICU ticket 6765, r25517).
|
||||
|
||||
#ifndef __BYTESTREAM_H__
|
||||
#define __BYTESTREAM_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Interface for writing bytes, and implementation classes.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/std_string.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A ByteSink can be filled with bytes.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API ByteSink : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Default constructor.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
ByteSink() { }
|
||||
/**
|
||||
* Virtual destructor.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual ~ByteSink();
|
||||
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Append(const char* bytes, int32_t n) = 0;
|
||||
|
||||
/**
|
||||
* Appends n bytes to this. Same as Append().
|
||||
* Call AppendU8() with u8"string literals" which are const char * in C++11
|
||||
* but const char8_t * in C++20.
|
||||
* If the compiler does support char8_t as a distinct type,
|
||||
* then an AppendU8() overload for that is defined and will be chosen.
|
||||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char* bytes, int32_t n) {
|
||||
Append(bytes, n);
|
||||
}
|
||||
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
|
||||
* Call AppendU8() with u8"string literals" which are const char * in C++11
|
||||
* but const char8_t * in C++20.
|
||||
* If the compiler does support char8_t as a distinct type,
|
||||
* then this AppendU8() overload for that is defined and will be chosen.
|
||||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char8_t* bytes, int32_t n) {
|
||||
Append(reinterpret_cast<const char*>(bytes), n);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* *result_capacity. Guarantees *result_capacity>=min_capacity.
|
||||
* May return a pointer to the caller-owned scratch buffer which must have
|
||||
* scratch_capacity>=min_capacity.
|
||||
* The returned buffer is only valid until the next operation
|
||||
* on this ByteSink.
|
||||
*
|
||||
* After writing at most *result_capacity bytes, call Append() with the
|
||||
* pointer returned from this function and the number of bytes written.
|
||||
* Many Append() implementations will avoid copying bytes if this function
|
||||
* returned an internal buffer.
|
||||
*
|
||||
* Partial usage example:
|
||||
* int32_t capacity;
|
||||
* char* buffer = sink->GetAppendBuffer(..., &capacity);
|
||||
* ... Write n bytes into buffer, with n <= capacity.
|
||||
* sink->Append(buffer, n);
|
||||
* In many implementations, that call to Append will avoid copying bytes.
|
||||
*
|
||||
* If the ByteSink allocates or reallocates an internal buffer, it should use
|
||||
* the desired_capacity_hint if appropriate.
|
||||
* If a caller cannot provide a reasonable guess at the desired capacity,
|
||||
* it should pass desired_capacity_hint=0.
|
||||
*
|
||||
* If a non-scratch buffer is returned, the caller may only pass
|
||||
* a prefix to it to Append().
|
||||
* That is, it is not correct to pass an interior pointer to Append().
|
||||
*
|
||||
* The default implementation always returns the scratch buffer.
|
||||
*
|
||||
* @param min_capacity required minimum capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param desired_capacity_hint desired capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param scratch default caller-owned buffer
|
||||
* @param scratch_capacity capacity of the scratch buffer
|
||||
* @param result_capacity pointer to an integer which will be set to the
|
||||
* capacity of the returned buffer
|
||||
* @return a buffer with *result_capacity>=min_capacity
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual char* GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t desired_capacity_hint,
|
||||
char* scratch, int32_t scratch_capacity,
|
||||
int32_t* result_capacity);
|
||||
|
||||
/**
|
||||
* Flush internal buffers.
|
||||
* Some byte sinks use internal buffers or provide buffering
|
||||
* and require calling Flush() at the end of the stream.
|
||||
* The ByteSink should be ready for further Append() calls after Flush().
|
||||
* The default implementation of Flush() does nothing.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Flush();
|
||||
|
||||
private:
|
||||
ByteSink(const ByteSink &) = delete;
|
||||
ByteSink &operator=(const ByteSink &) = delete;
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------
|
||||
// Some standard implementations
|
||||
|
||||
/**
|
||||
* Implementation of ByteSink that writes to a flat byte array,
|
||||
* with bounds-checking:
|
||||
* This sink will not write more than capacity bytes to outbuf.
|
||||
* If more than capacity bytes are Append()ed, then excess bytes are ignored,
|
||||
* and Overflowed() will return true.
|
||||
* Overflow does not cause a runtime error.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API CheckedArrayByteSink : public ByteSink {
|
||||
public:
|
||||
/**
|
||||
* Constructs a ByteSink that will write to outbuf[0..capacity-1].
|
||||
* @param outbuf buffer to write to
|
||||
* @param capacity size of the buffer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
CheckedArrayByteSink(char* outbuf, int32_t capacity);
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual ~CheckedArrayByteSink();
|
||||
/**
|
||||
* Returns the sink to its original state, without modifying the buffer.
|
||||
* Useful for reusing both the buffer and the sink for multiple streams.
|
||||
* Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
|
||||
* and Overflowed()=false.
|
||||
* @return *this
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
virtual CheckedArrayByteSink& Reset();
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Append(const char* bytes, int32_t n);
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* *result_capacity. For details see the base class documentation.
|
||||
* @param min_capacity required minimum capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param desired_capacity_hint desired capacity of the returned buffer;
|
||||
* must be non-negative
|
||||
* @param scratch default caller-owned buffer
|
||||
* @param scratch_capacity capacity of the scratch buffer
|
||||
* @param result_capacity pointer to an integer which will be set to the
|
||||
* capacity of the returned buffer
|
||||
* @return a buffer with *result_capacity>=min_capacity
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual char* GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t desired_capacity_hint,
|
||||
char* scratch, int32_t scratch_capacity,
|
||||
int32_t* result_capacity);
|
||||
/**
|
||||
* Returns the number of bytes actually written to the sink.
|
||||
* @return number of bytes written to the buffer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
int32_t NumberOfBytesWritten() const { return size_; }
|
||||
/**
|
||||
* Returns true if any bytes were discarded, i.e., if there was an
|
||||
* attempt to write more than 'capacity' bytes.
|
||||
* @return true if more than 'capacity' bytes were Append()ed
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
UBool Overflowed() const { return overflowed_; }
|
||||
/**
|
||||
* Returns the number of bytes appended to the sink.
|
||||
* If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
|
||||
* else they return the same number.
|
||||
* @return number of bytes written to the buffer
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
int32_t NumberOfBytesAppended() const { return appended_; }
|
||||
private:
|
||||
char* outbuf_;
|
||||
const int32_t capacity_;
|
||||
int32_t size_;
|
||||
int32_t appended_;
|
||||
UBool overflowed_;
|
||||
|
||||
CheckedArrayByteSink() = delete;
|
||||
CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
|
||||
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* Implementation of ByteSink that writes to a "string".
|
||||
* The StringClass is usually instantiated with a std::string.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
template<typename StringClass>
|
||||
class StringByteSink : public ByteSink {
|
||||
public:
|
||||
/**
|
||||
* Constructs a ByteSink that will append bytes to the dest string.
|
||||
* @param dest pointer to string object to append to
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringByteSink(StringClass* dest) : dest_(dest) { }
|
||||
/**
|
||||
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
|
||||
*
|
||||
* @param dest pointer to string object to append to
|
||||
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
|
||||
* @stable ICU 60
|
||||
*/
|
||||
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
|
||||
if (initialAppendCapacity > 0 &&
|
||||
(uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
|
||||
dest->reserve(dest->length() + initialAppendCapacity);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param data the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
|
||||
private:
|
||||
StringClass* dest_;
|
||||
|
||||
StringByteSink() = delete;
|
||||
StringByteSink(const StringByteSink &) = delete;
|
||||
StringByteSink &operator=(const StringByteSink &) = delete;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __BYTESTREAM_H__
|
||||
@@ -1,313 +0,0 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// char16ptr.h
|
||||
// created: 2017feb28 Markus W. Scherer
|
||||
|
||||
#ifndef __CHAR16PTR_H__
|
||||
#define __CHAR16PTR_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: char16_t pointer wrappers with
|
||||
* implicit conversion from bit-compatible raw pointer types.
|
||||
* Also conversion functions from char16_t * to UChar * and OldUChar *.
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \def U_ALIASING_BARRIER
|
||||
* Barrier for pointer anti-aliasing optimizations even across function boundaries.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
// Use the predefined value.
|
||||
#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT
|
||||
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
|
||||
#elif defined(U_IN_DOXYGEN)
|
||||
# define U_ALIASING_BARRIER(ptr)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API Char16Ptr U_FINAL {
|
||||
public:
|
||||
/**
|
||||
* Copies the pointer.
|
||||
* @param p pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(char16_t *p);
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(uint16_t *p);
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(wchar_t *p);
|
||||
#endif
|
||||
/**
|
||||
* nullptr constructor.
|
||||
* @param p nullptr
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(std::nullptr_t p);
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ~Char16Ptr();
|
||||
|
||||
/**
|
||||
* Pointer access.
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline char16_t *get() const;
|
||||
/**
|
||||
* char16_t pointer access via type conversion (e.g., static_cast).
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline operator char16_t *() const { return get(); }
|
||||
|
||||
private:
|
||||
Char16Ptr() = delete;
|
||||
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
template<typename T> static char16_t *cast(T *t) {
|
||||
U_ALIASING_BARRIER(t);
|
||||
return reinterpret_cast<char16_t *>(t);
|
||||
}
|
||||
|
||||
char16_t *p_;
|
||||
#else
|
||||
union {
|
||||
char16_t *cp;
|
||||
uint16_t *up;
|
||||
wchar_t *wp;
|
||||
} u_;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// \cond
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
||||
Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
|
||||
Char16Ptr::~Char16Ptr() {
|
||||
U_ALIASING_BARRIER(p_);
|
||||
}
|
||||
|
||||
char16_t *Char16Ptr::get() const { return p_; }
|
||||
|
||||
#else
|
||||
|
||||
Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
|
||||
#endif
|
||||
Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
|
||||
Char16Ptr::~Char16Ptr() {}
|
||||
|
||||
char16_t *Char16Ptr::get() const { return u_.cp; }
|
||||
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API ConstChar16Ptr U_FINAL {
|
||||
public:
|
||||
/**
|
||||
* Copies the pointer.
|
||||
* @param p pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const char16_t *p);
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const uint16_t *p);
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* @param p pointer to be converted
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const wchar_t *p);
|
||||
#endif
|
||||
/**
|
||||
* nullptr constructor.
|
||||
* @param p nullptr
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const std::nullptr_t p);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ~ConstChar16Ptr();
|
||||
|
||||
/**
|
||||
* Pointer access.
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const char16_t *get() const;
|
||||
/**
|
||||
* char16_t pointer access via type conversion (e.g., static_cast).
|
||||
* @return the wrapped pointer
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline operator const char16_t *() const { return get(); }
|
||||
|
||||
private:
|
||||
ConstChar16Ptr() = delete;
|
||||
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
template<typename T> static const char16_t *cast(const T *t) {
|
||||
U_ALIASING_BARRIER(t);
|
||||
return reinterpret_cast<const char16_t *>(t);
|
||||
}
|
||||
|
||||
const char16_t *p_;
|
||||
#else
|
||||
union {
|
||||
const char16_t *cp;
|
||||
const uint16_t *up;
|
||||
const wchar_t *wp;
|
||||
} u_;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// \cond
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
||||
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
|
||||
#endif
|
||||
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
|
||||
ConstChar16Ptr::~ConstChar16Ptr() {
|
||||
U_ALIASING_BARRIER(p_);
|
||||
}
|
||||
|
||||
const char16_t *ConstChar16Ptr::get() const { return p_; }
|
||||
|
||||
#else
|
||||
|
||||
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
|
||||
#endif
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
|
||||
#endif
|
||||
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
|
||||
ConstChar16Ptr::~ConstChar16Ptr() {}
|
||||
|
||||
const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
|
||||
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* Converts from const char16_t * to const UChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as const UChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const UChar *toUCharPtr(const char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<const UChar *>(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts from char16_t * to UChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as UChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline UChar *toUCharPtr(char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<UChar *>(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts from const char16_t * to const OldUChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as const OldUChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const OldUChar *toOldUCharPtr(const char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<const OldUChar *>(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts from char16_t * to OldUChar *.
|
||||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as OldUChar *
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline OldUChar *toOldUCharPtr(char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
U_ALIASING_BARRIER(p);
|
||||
#endif
|
||||
return reinterpret_cast<OldUChar *>(p);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __CHAR16PTR_H__
|
||||
@@ -1,734 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CHARITER_H
|
||||
#define CHARITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Character Iterator
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/**
|
||||
* Abstract class that defines an API for forward-only iteration
|
||||
* on text objects.
|
||||
* This is a minimal interface for iteration without random access
|
||||
* or backwards iteration. It is especially useful for wrapping
|
||||
* streams with converters into an object for collation or
|
||||
* normalization.
|
||||
*
|
||||
* <p>Characters can be accessed in two ways: as code units or as
|
||||
* code points.
|
||||
* Unicode code points are 21-bit integers and are the scalar values
|
||||
* of Unicode characters. ICU uses the type UChar32 for them.
|
||||
* Unicode code units are the storage units of a given
|
||||
* Unicode/UCS Transformation Format (a character encoding scheme).
|
||||
* With UTF-16, all code points can be represented with either one
|
||||
* or two code units ("surrogates").
|
||||
* String storage is typically based on code units, while properties
|
||||
* of characters are typically determined using code point values.
|
||||
* Some processes may be designed to work with sequences of code units,
|
||||
* or it may be known that all characters that are important to an
|
||||
* algorithm can be represented with single code units.
|
||||
* Other processes will need to use the code point access functions.</p>
|
||||
*
|
||||
* <p>ForwardCharacterIterator provides nextPostInc() to access
|
||||
* a code unit and advance an internal position into the text object,
|
||||
* similar to a <code>return text[position++]</code>.<br>
|
||||
* It provides next32PostInc() to access a code point and advance an internal
|
||||
* position.</p>
|
||||
*
|
||||
* <p>next32PostInc() assumes that the current position is that of
|
||||
* the beginning of a code point, i.e., of its first code unit.
|
||||
* After next32PostInc(), this will be true again.
|
||||
* In general, access to code units and code points in the same
|
||||
* iteration loop should not be mixed. In UTF-16, if the current position
|
||||
* is on a second code unit (Low Surrogate), then only that code unit
|
||||
* is returned even by next32PostInc().</p>
|
||||
*
|
||||
* <p>For iteration with either function, there are two ways to
|
||||
* check for the end of the iteration. When there are no more
|
||||
* characters in the text object:
|
||||
* <ul>
|
||||
* <li>The hasNext() function returns false.</li>
|
||||
* <li>nextPostInc() and next32PostInc() return DONE
|
||||
* when one attempts to read beyond the end of the text object.</li>
|
||||
* </ul>
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* void function1(ForwardCharacterIterator &it) {
|
||||
* UChar32 c;
|
||||
* while(it.hasNext()) {
|
||||
* c=it.next32PostInc();
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* void function1(ForwardCharacterIterator &it) {
|
||||
* char16_t c;
|
||||
* while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </p>
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API ForwardCharacterIterator : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Value returned by most of ForwardCharacterIterator's functions
|
||||
* when the iterator has reached the limits of its iteration.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
enum { DONE = 0xffff };
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~ForwardCharacterIterator();
|
||||
|
||||
/**
|
||||
* Returns true when both iterators refer to the same
|
||||
* character in the same character-storage object.
|
||||
* @param that The ForwardCharacterIterator to be compared for equality
|
||||
* @return true when both iterators refer to the same
|
||||
* character in the same character-storage object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
|
||||
|
||||
/**
|
||||
* Returns true when the iterators refer to different
|
||||
* text-storage objects, or to different characters in the
|
||||
* same text-storage object.
|
||||
* @param that The ForwardCharacterIterator to be compared for inequality
|
||||
* @return true when the iterators refer to different
|
||||
* text-storage objects, or to different characters in the
|
||||
* same text-storage object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline UBool operator!=(const ForwardCharacterIterator& that) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator.
|
||||
* @return the hash code.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t hashCode(void) const = 0;
|
||||
|
||||
/**
|
||||
* Returns a UClassID for this ForwardCharacterIterator ("poor man's
|
||||
* RTTI").<P> Despite the fact that this function is public,
|
||||
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
|
||||
* @return a UClassID for this ForwardCharacterIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UClassID getDynamicClassID(void) const = 0;
|
||||
|
||||
/**
|
||||
* Gets the current code unit for returning and advances to the next code unit
|
||||
* in the iteration range
|
||||
* (toward endIndex()). If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the current code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t nextPostInc(void) = 0;
|
||||
|
||||
/**
|
||||
* Gets the current code point for returning and advances to the next code point
|
||||
* in the iteration range
|
||||
* (toward endIndex()). If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @return the current code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 next32PostInc(void) = 0;
|
||||
|
||||
/**
|
||||
* Returns false if there are no more code units or code points
|
||||
* at or after the current position in the iteration range.
|
||||
* This is used with nextPostInc() or next32PostInc() in forward
|
||||
* iteration.
|
||||
* @returns false if there are no more code units or code points
|
||||
* at or after the current position in the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool hasNext() = 0;
|
||||
|
||||
protected:
|
||||
/** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
||||
ForwardCharacterIterator();
|
||||
|
||||
/** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
||||
ForwardCharacterIterator(const ForwardCharacterIterator &other);
|
||||
|
||||
/**
|
||||
* Assignment operator to be overridden in the implementing class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Abstract class that defines an API for iteration
|
||||
* on text objects.
|
||||
* This is an interface for forward and backward iteration
|
||||
* and random access into a text object.
|
||||
*
|
||||
* <p>The API provides backward compatibility to the Java and older ICU
|
||||
* CharacterIterator classes but extends them significantly:
|
||||
* <ol>
|
||||
* <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
|
||||
* <li>While the old API functions provided forward iteration with
|
||||
* "pre-increment" semantics, the new one also provides functions
|
||||
* with "post-increment" semantics. They are more efficient and should
|
||||
* be the preferred iterator functions for new implementations.
|
||||
* The backward iteration always had "pre-decrement" semantics, which
|
||||
* are efficient.</li>
|
||||
* <li>Just like ForwardCharacterIterator, it provides access to
|
||||
* both code units and code points. Code point access versions are available
|
||||
* for the old and the new iteration semantics.</li>
|
||||
* <li>There are new functions for setting and moving the current position
|
||||
* without returning a character, for efficiency.</li>
|
||||
* </ol>
|
||||
*
|
||||
* See ForwardCharacterIterator for examples for using the new forward iteration
|
||||
* functions. For backward iteration, there is also a hasPrevious() function
|
||||
* that can be used analogously to hasNext().
|
||||
* The old functions work as before and are shown below.</p>
|
||||
*
|
||||
* <p>Examples for some of the new functions:</p>
|
||||
*
|
||||
* Forward iteration with hasNext():
|
||||
* \code
|
||||
* void forward1(CharacterIterator &it) {
|
||||
* UChar32 c;
|
||||
* for(it.setToStart(); it.hasNext();) {
|
||||
* c=it.next32PostInc();
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* Forward iteration more similar to loops with the old forward iteration,
|
||||
* showing a way to convert simple for() loops:
|
||||
* \code
|
||||
* void forward2(CharacterIterator &it) {
|
||||
* char16_t c;
|
||||
* for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* Backward iteration with setToEnd() and hasPrevious():
|
||||
* \code
|
||||
* void backward1(CharacterIterator &it) {
|
||||
* UChar32 c;
|
||||
* for(it.setToEnd(); it.hasPrevious();) {
|
||||
* c=it.previous32();
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* Backward iteration with a more traditional for() loop:
|
||||
* \code
|
||||
* void backward2(CharacterIterator &it) {
|
||||
* char16_t c;
|
||||
* for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
|
||||
* // use c
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* Example for random access:
|
||||
* \code
|
||||
* void random(CharacterIterator &it) {
|
||||
* // set to the third code point from the beginning
|
||||
* it.move32(3, CharacterIterator::kStart);
|
||||
* // get a code point from here without moving the position
|
||||
* UChar32 c=it.current32();
|
||||
* // get the position
|
||||
* int32_t pos=it.getIndex();
|
||||
* // get the previous code unit
|
||||
* char16_t u=it.previous();
|
||||
* // move back one more code unit
|
||||
* it.move(-1, CharacterIterator::kCurrent);
|
||||
* // set the position back to where it was
|
||||
* // and read the same code point c and move beyond it
|
||||
* it.setIndex(pos);
|
||||
* if(c!=it.next32PostInc()) {
|
||||
* exit(1); // CharacterIterator inconsistent
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* <p>Examples, especially for the old API:</p>
|
||||
*
|
||||
* Function processing characters, in this example simple output
|
||||
* <pre>
|
||||
* \code
|
||||
* void processChar( char16_t c )
|
||||
* {
|
||||
* cout << " " << c;
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Traverse the text from start to finish
|
||||
* <pre>
|
||||
* \code
|
||||
* void traverseForward(CharacterIterator& iter)
|
||||
* {
|
||||
* for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
|
||||
* processChar(c);
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Traverse the text backwards, from end to start
|
||||
* <pre>
|
||||
* \code
|
||||
* void traverseBackward(CharacterIterator& iter)
|
||||
* {
|
||||
* for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
|
||||
* processChar(c);
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Traverse both forward and backward from a given position in the text.
|
||||
* Calls to notBoundary() in this example represents some additional stopping criteria.
|
||||
* <pre>
|
||||
* \code
|
||||
* void traverseOut(CharacterIterator& iter, int32_t pos)
|
||||
* {
|
||||
* char16_t c;
|
||||
* for (c = iter.setIndex(pos);
|
||||
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
* c = iter.next()) {}
|
||||
* int32_t end = iter.getIndex();
|
||||
* for (c = iter.setIndex(pos);
|
||||
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
* c = iter.previous()) {}
|
||||
* int32_t start = iter.getIndex() + 1;
|
||||
*
|
||||
* cout << "start: " << start << " end: " << end << endl;
|
||||
* for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
|
||||
* processChar(c);
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Creating a StringCharacterIterator and calling the test functions
|
||||
* <pre>
|
||||
* \code
|
||||
* void CharacterIterator_Example( void )
|
||||
* {
|
||||
* cout << endl << "===== CharacterIterator_Example: =====" << endl;
|
||||
* UnicodeString text("Ein kleiner Satz.");
|
||||
* StringCharacterIterator iterator(text);
|
||||
* cout << "----- traverseForward: -----------" << endl;
|
||||
* traverseForward( iterator );
|
||||
* cout << endl << endl << "----- traverseBackward: ----------" << endl;
|
||||
* traverseBackward( iterator );
|
||||
* cout << endl << endl << "----- traverseOut: ---------------" << endl;
|
||||
* traverseOut( iterator, 7 );
|
||||
* cout << endl << endl << "-----" << endl;
|
||||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Origin enumeration for the move() and move32() functions.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
enum EOrigin { kStart, kCurrent, kEnd };
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~CharacterIterator();
|
||||
|
||||
/**
|
||||
* Returns a pointer to a new CharacterIterator of the same
|
||||
* concrete class as this one, and referring to the same
|
||||
* character in the same text-storage object as this one. The
|
||||
* caller is responsible for deleting the new clone.
|
||||
* @return a pointer to a new CharacterIterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual CharacterIterator* clone() const = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with next().
|
||||
* @return the first code unit in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t first(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit in its
|
||||
* iteration range, returns that code unit, and moves the position
|
||||
* to the second code unit. This is an alternative to setToStart()
|
||||
* for forward iteration with nextPostInc().
|
||||
* @return the first code unit in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t firstPostInc(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, and returns that code unit,
|
||||
* This can be used to begin an iteration with next32().
|
||||
* Note that an iteration with next32PostInc(), beginning with,
|
||||
* e.g., setToStart() or firstPostInc(), is more efficient.
|
||||
* @return the first code point in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 first32(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code point in its
|
||||
* iteration range, returns that code point, and moves the position
|
||||
* to the second code point. This is an alternative to setToStart()
|
||||
* for forward iteration with next32PostInc().
|
||||
* @return the first code point in its iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 first32PostInc(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first code unit or code point in its
|
||||
* iteration range. This can be used to begin a forward
|
||||
* iteration with nextPostInc() or next32PostInc().
|
||||
* @return the start position of the iteration range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t setToStart();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code unit in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with previous().
|
||||
* @return the last code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t last(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last code point in its
|
||||
* iteration range, and returns that code unit.
|
||||
* This can be used to begin an iteration with previous32().
|
||||
* @return the last code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 last32(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to the end of its iteration range, just behind
|
||||
* the last code unit or code point. This can be used to begin a backward
|
||||
* iteration with previous() or previous32().
|
||||
* @return the end position of the iteration range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t setToEnd();
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that code unit.
|
||||
* @param position the "position"-th code unit in the text-storage object
|
||||
* @return the "position"-th code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t setIndex(int32_t position) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the beginning of the code point
|
||||
* that contains the "position"-th code unit
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that code point.
|
||||
* The current position is adjusted to the beginning of the code point
|
||||
* (its first code unit).
|
||||
* @param position the "position"-th code unit in the text-storage object
|
||||
* @return the "position"-th code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 setIndex32(int32_t position) = 0;
|
||||
|
||||
/**
|
||||
* Returns the code unit the iterator currently refers to.
|
||||
* @return the current code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t current(void) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the code point the iterator currently refers to.
|
||||
* @return the current code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 current32(void) const = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next code unit in the iteration range
|
||||
* (toward endIndex()), and returns that code unit. If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the next code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t next(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next code point in the iteration range
|
||||
* (toward endIndex()), and returns that code point. If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* Note that iteration with "pre-increment" semantics is less
|
||||
* efficient than iteration with "post-increment" semantics
|
||||
* that is provided by next32PostInc().
|
||||
* @return the next code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 next32(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous code unit in the iteration range
|
||||
* (toward startIndex()), and returns that code unit. If there are
|
||||
* no more code units to return, returns DONE.
|
||||
* @return the previous code unit.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual char16_t previous(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous code point in the iteration range
|
||||
* (toward startIndex()), and returns that code point. If there are
|
||||
* no more code points to return, returns DONE.
|
||||
* @return the previous code point.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UChar32 previous32(void) = 0;
|
||||
|
||||
/**
|
||||
* Returns false if there are no more code units or code points
|
||||
* before the current position in the iteration range.
|
||||
* This is used with previous() or previous32() in backward
|
||||
* iteration.
|
||||
* @return false if there are no more code units or code points
|
||||
* before the current position in the iteration range, return true otherwise.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UBool hasPrevious() = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character returned by first(). Since it's
|
||||
* possible to create an iterator that iterates across only
|
||||
* part of a text-storage object, this number isn't
|
||||
* necessarily 0.
|
||||
* @returns the numeric index in the underlying text-storage
|
||||
* object of the character returned by first().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the position immediately BEYOND the character
|
||||
* returned by last().
|
||||
* @return the numeric index in the underlying text-storage
|
||||
* object of the position immediately BEYOND the character
|
||||
* returned by last().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t endIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character the iterator currently refers to
|
||||
* (i.e., the character returned by current()).
|
||||
* @return the numeric index in the text-storage object of
|
||||
* the character the iterator currently refers to
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the length of the entire text in the underlying
|
||||
* text-storage object.
|
||||
* @return the length of the entire text in the text-storage object
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
inline int32_t getLength() const;
|
||||
|
||||
/**
|
||||
* Moves the current position relative to the start or end of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* @param delta the position relative to origin. A positive delta means forward;
|
||||
* a negative delta means backward.
|
||||
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
||||
* @return the new position
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual int32_t move(int32_t delta, EOrigin origin) = 0;
|
||||
|
||||
/**
|
||||
* Moves the current position relative to the start or end of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code points forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* @param delta the position relative to origin. A positive delta means forward;
|
||||
* a negative delta means backward.
|
||||
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
||||
* @return the new position
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef move32
|
||||
// One of the system headers right now is sometimes defining a conflicting macro we don't use
|
||||
#undef move32
|
||||
#endif
|
||||
virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
|
||||
|
||||
/**
|
||||
* Copies the text under iteration into the UnicodeString
|
||||
* referred to by "result".
|
||||
* @param result Receives a copy of the text under iteration.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void getText(UnicodeString& result) = 0;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Empty constructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator();
|
||||
|
||||
/**
|
||||
* Constructor, just setting the length field in this base class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(int32_t length);
|
||||
|
||||
/**
|
||||
* Constructor, just setting the length and position fields in this base class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(int32_t length, int32_t position);
|
||||
|
||||
/**
|
||||
* Constructor, just setting the length, start, end, and position fields in this base class.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*
|
||||
* @param that The CharacterIterator to be copied
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator(const CharacterIterator &that);
|
||||
|
||||
/**
|
||||
* Assignment operator. Sets this CharacterIterator to have the same behavior,
|
||||
* as the one passed in.
|
||||
* @param that The CharacterIterator passed in.
|
||||
* @return the newly set CharacterIterator.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
CharacterIterator &operator=(const CharacterIterator &that);
|
||||
|
||||
/**
|
||||
* Base class text length field.
|
||||
* Necessary this for correct getText() and hashCode().
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t textLength;
|
||||
|
||||
/**
|
||||
* Base class field for the current position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t pos;
|
||||
|
||||
/**
|
||||
* Base class field for the start of the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t begin;
|
||||
|
||||
/**
|
||||
* Base class field for the end of the iteration range.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t end;
|
||||
};
|
||||
|
||||
inline UBool
|
||||
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
|
||||
return !operator==(that);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::setToStart() {
|
||||
return move(0, kStart);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::setToEnd() {
|
||||
return move(0, kEnd);
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::startIndex(void) const {
|
||||
return begin;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::endIndex(void) const {
|
||||
return end;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::getIndex(void) const {
|
||||
return pos;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
CharacterIterator::getLength(void) const {
|
||||
return textLength;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
@@ -1,595 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: localpointer.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009nov13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __LOCALPOINTER_H__
|
||||
#define __LOCALPOINTER_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
|
||||
*
|
||||
* These classes are inspired by
|
||||
* - std::auto_ptr
|
||||
* - boost::scoped_ptr & boost::scoped_array
|
||||
* - Taligent Safe Pointers (TOnlyPointerTo)
|
||||
*
|
||||
* but none of those provide for all of the goals for ICU smart pointers:
|
||||
* - Smart pointer owns the object and releases it when it goes out of scope.
|
||||
* - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
|
||||
* - ICU-compatible: No exceptions.
|
||||
* - Need to be able to orphan/release the pointer and its ownership.
|
||||
* - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
|
||||
*
|
||||
* For details see http://site.icu-project.org/design/cpp/scoped_ptr
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <memory>
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* "Smart pointer" base class; do not use directly: use LocalPointer etc.
|
||||
*
|
||||
* Base class for smart pointer classes that do not throw exceptions.
|
||||
*
|
||||
* Do not use this base class directly, since it does not delete its pointer.
|
||||
* A subclass must implement methods that delete the pointer:
|
||||
* Destructor and adoptInstead().
|
||||
*
|
||||
* There is no operator T *() provided because the programmer must decide
|
||||
* whether to use getAlias() (without transfer of ownership) or orphan()
|
||||
* (with transfer of ownership and NULLing of the pointer).
|
||||
*
|
||||
* @see LocalPointer
|
||||
* @see LocalArray
|
||||
* @see U_DEFINE_LOCAL_OPEN_POINTER
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalPointerBase {
|
||||
public:
|
||||
// No heap allocation. Use only on the stack.
|
||||
static void* U_EXPORT2 operator new(size_t) = delete;
|
||||
static void* U_EXPORT2 operator new[](size_t) = delete;
|
||||
#if U_HAVE_PLACEMENT_NEW
|
||||
static void* U_EXPORT2 operator new(size_t, void*) = delete;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
explicit LocalPointerBase(T *p=NULL) : ptr(p) {}
|
||||
/**
|
||||
* Destructor deletes the object it owns.
|
||||
* Subclass must override: Base class does nothing.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~LocalPointerBase() { /* delete ptr; */ }
|
||||
/**
|
||||
* NULL check.
|
||||
* @return true if ==NULL
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UBool isNull() const { return ptr==NULL; }
|
||||
/**
|
||||
* NULL check.
|
||||
* @return true if !=NULL
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
UBool isValid() const { return ptr!=NULL; }
|
||||
/**
|
||||
* Comparison with a simple pointer, so that existing code
|
||||
* with ==NULL need not be changed.
|
||||
* @param other simple pointer for comparison
|
||||
* @return true if this pointer value equals other
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
bool operator==(const T *other) const { return ptr==other; }
|
||||
/**
|
||||
* Comparison with a simple pointer, so that existing code
|
||||
* with !=NULL need not be changed.
|
||||
* @param other simple pointer for comparison
|
||||
* @return true if this pointer value differs from other
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
bool operator!=(const T *other) const { return ptr!=other; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the pointer value
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T *getAlias() const { return ptr; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the pointer value as a reference
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T &operator*() const { return *ptr; }
|
||||
/**
|
||||
* Access without ownership change.
|
||||
* @return the pointer value
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T *operator->() const { return ptr; }
|
||||
/**
|
||||
* Gives up ownership; the internal pointer becomes NULL.
|
||||
* @return the pointer value;
|
||||
* caller becomes responsible for deleting the object
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T *orphan() {
|
||||
T *p=ptr;
|
||||
ptr=NULL;
|
||||
return p;
|
||||
}
|
||||
/**
|
||||
* Deletes the object it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* Subclass must override: Base class does not delete the object.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
// delete ptr;
|
||||
ptr=p;
|
||||
}
|
||||
protected:
|
||||
/**
|
||||
* Actual pointer.
|
||||
* @internal
|
||||
*/
|
||||
T *ptr;
|
||||
private:
|
||||
// No comparison operators with other LocalPointerBases.
|
||||
bool operator==(const LocalPointerBase<T> &other);
|
||||
bool operator!=(const LocalPointerBase<T> &other);
|
||||
// No ownership sharing: No copy constructor, no assignment operator.
|
||||
LocalPointerBase(const LocalPointerBase<T> &other);
|
||||
void operator=(const LocalPointerBase<T> &other);
|
||||
};
|
||||
|
||||
/**
|
||||
* "Smart pointer" class, deletes objects via the standard C++ delete operator.
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* Usage example:
|
||||
* \code
|
||||
* LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
|
||||
* int32_t length=s->length(); // 2
|
||||
* char16_t lead=s->charAt(0); // 0xd900
|
||||
* if(some condition) { return; } // no need to explicitly delete the pointer
|
||||
* s.adoptInstead(new UnicodeString((char16_t)0xfffc));
|
||||
* length=s->length(); // 1
|
||||
* // no need to explicitly delete the pointer
|
||||
* \endcode
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalPointer : public LocalPointerBase<T> {
|
||||
public:
|
||||
using LocalPointerBase<T>::operator*;
|
||||
using LocalPointerBase<T>::operator->;
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {}
|
||||
/**
|
||||
* Constructor takes ownership and reports an error if NULL.
|
||||
*
|
||||
* This constructor is intended to be used with other-class constructors
|
||||
* that may report a failure UErrorCode,
|
||||
* so that callers need to check only for U_FAILURE(errorCode)
|
||||
* and not also separately for isNull().
|
||||
*
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==NULL and no other failure code had been set
|
||||
* @stable ICU 55
|
||||
*/
|
||||
LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
|
||||
if(p==NULL && U_SUCCESS(errorCode)) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Move constructor, leaves src with isNull().
|
||||
* @param src source smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
|
||||
src.ptr=NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a LocalPointer from a C++11 std::unique_ptr.
|
||||
* The LocalPointer steals the object owned by the std::unique_ptr.
|
||||
*
|
||||
* This constructor works via move semantics. If your std::unique_ptr is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the pointer will be stolen.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
explicit LocalPointer(std::unique_ptr<T> &&p)
|
||||
: LocalPointerBase<T>(p.release()) {}
|
||||
|
||||
/**
|
||||
* Destructor deletes the object it owns.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~LocalPointer() {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
}
|
||||
/**
|
||||
* Move assignment operator, leaves src with isNull().
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source smart pointer
|
||||
* @return *this
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=src.ptr;
|
||||
src.ptr=NULL;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move-assign from an std::unique_ptr to this LocalPointer.
|
||||
* Steals the pointer from the std::unique_ptr.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the pointer will be stolen.
|
||||
* @return *this
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocalPointer<T> &operator=(std::unique_ptr<T> &&p) U_NOEXCEPT {
|
||||
adoptInstead(p.release());
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap pointers.
|
||||
* @param other other smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
void swap(LocalPointer<T> &other) U_NOEXCEPT {
|
||||
T *temp=LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=other.ptr;
|
||||
other.ptr=temp;
|
||||
}
|
||||
/**
|
||||
* Non-member LocalPointer swap function.
|
||||
* @param p1 will get p2's pointer
|
||||
* @param p2 will get p1's pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
friend inline void swap(LocalPointer<T> &p1, LocalPointer<T> &p2) U_NOEXCEPT {
|
||||
p1.swap(p2);
|
||||
}
|
||||
/**
|
||||
* Deletes the object it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
}
|
||||
/**
|
||||
* Deletes the object it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
*
|
||||
* If U_FAILURE(errorCode), then the current object is retained and the new one deleted.
|
||||
*
|
||||
* If U_SUCCESS(errorCode) but the input pointer is NULL,
|
||||
* then U_MEMORY_ALLOCATION_ERROR is set,
|
||||
* the current object is deleted, and NULL is set.
|
||||
*
|
||||
* @param p simple pointer to an object that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==NULL and no other failure code had been set
|
||||
* @stable ICU 55
|
||||
*/
|
||||
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
delete LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
if(p==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
} else {
|
||||
delete p;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Conversion operator to a C++11 std::unique_ptr.
|
||||
* Disowns the object and gives it to the returned std::unique_ptr.
|
||||
*
|
||||
* This operator works via move semantics. If your LocalPointer is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @return An std::unique_ptr owning the pointer previously owned by this
|
||||
* icu::LocalPointer.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
operator std::unique_ptr<T> () && {
|
||||
return std::unique_ptr<T>(LocalPointerBase<T>::orphan());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* "Smart pointer" class, deletes objects via the C++ array delete[] operator.
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
* Adds operator[] for array item access.
|
||||
*
|
||||
* Usage example:
|
||||
* \code
|
||||
* LocalArray<UnicodeString> a(new UnicodeString[2]);
|
||||
* a[0].append((char16_t)0x61);
|
||||
* if(some condition) { return; } // no need to explicitly delete the array
|
||||
* a.adoptInstead(new UnicodeString[4]);
|
||||
* a[3].append((char16_t)0x62).append((char16_t)0x63).reverse();
|
||||
* // no need to explicitly delete the array
|
||||
* \endcode
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
template<typename T>
|
||||
class LocalArray : public LocalPointerBase<T> {
|
||||
public:
|
||||
using LocalPointerBase<T>::operator*;
|
||||
using LocalPointerBase<T>::operator->;
|
||||
/**
|
||||
* Constructor takes ownership.
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {}
|
||||
/**
|
||||
* Constructor takes ownership and reports an error if NULL.
|
||||
*
|
||||
* This constructor is intended to be used with other-class constructors
|
||||
* that may report a failure UErrorCode,
|
||||
* so that callers need to check only for U_FAILURE(errorCode)
|
||||
* and not also separately for isNull().
|
||||
*
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==NULL and no other failure code had been set
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
|
||||
if(p==NULL && U_SUCCESS(errorCode)) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Move constructor, leaves src with isNull().
|
||||
* @param src source smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
|
||||
src.ptr=NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a LocalArray from a C++11 std::unique_ptr of an array type.
|
||||
* The LocalPointer steals the array owned by the std::unique_ptr.
|
||||
*
|
||||
* This constructor works via move semantics. If your std::unique_ptr is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the array will be stolen.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
explicit LocalArray(std::unique_ptr<T[]> &&p)
|
||||
: LocalPointerBase<T>(p.release()) {}
|
||||
|
||||
/**
|
||||
* Destructor deletes the array it owns.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
~LocalArray() {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
}
|
||||
/**
|
||||
* Move assignment operator, leaves src with isNull().
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source smart pointer
|
||||
* @return *this
|
||||
* @stable ICU 56
|
||||
*/
|
||||
LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=src.ptr;
|
||||
src.ptr=NULL;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move-assign from an std::unique_ptr to this LocalPointer.
|
||||
* Steals the array from the std::unique_ptr.
|
||||
*
|
||||
* @param p The std::unique_ptr from which the array will be stolen.
|
||||
* @return *this
|
||||
* @stable ICU 64
|
||||
*/
|
||||
LocalArray<T> &operator=(std::unique_ptr<T[]> &&p) U_NOEXCEPT {
|
||||
adoptInstead(p.release());
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap pointers.
|
||||
* @param other other smart pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
void swap(LocalArray<T> &other) U_NOEXCEPT {
|
||||
T *temp=LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=other.ptr;
|
||||
other.ptr=temp;
|
||||
}
|
||||
/**
|
||||
* Non-member LocalArray swap function.
|
||||
* @param p1 will get p2's pointer
|
||||
* @param p2 will get p1's pointer
|
||||
* @stable ICU 56
|
||||
*/
|
||||
friend inline void swap(LocalArray<T> &p1, LocalArray<T> &p2) U_NOEXCEPT {
|
||||
p1.swap(p2);
|
||||
}
|
||||
/**
|
||||
* Deletes the array it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
void adoptInstead(T *p) {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
}
|
||||
/**
|
||||
* Deletes the array it owns,
|
||||
* and adopts (takes ownership of) the one passed in.
|
||||
*
|
||||
* If U_FAILURE(errorCode), then the current array is retained and the new one deleted.
|
||||
*
|
||||
* If U_SUCCESS(errorCode) but the input pointer is NULL,
|
||||
* then U_MEMORY_ALLOCATION_ERROR is set,
|
||||
* the current array is deleted, and NULL is set.
|
||||
*
|
||||
* @param p simple pointer to an array of T objects that is adopted
|
||||
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
|
||||
* if p==NULL and no other failure code had been set
|
||||
* @stable ICU 56
|
||||
*/
|
||||
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
LocalPointerBase<T>::ptr=p;
|
||||
if(p==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
} else {
|
||||
delete[] p;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Array item access (writable).
|
||||
* No index bounds check.
|
||||
* @param i array index
|
||||
* @return reference to the array item
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
|
||||
|
||||
/**
|
||||
* Conversion operator to a C++11 std::unique_ptr.
|
||||
* Disowns the object and gives it to the returned std::unique_ptr.
|
||||
*
|
||||
* This operator works via move semantics. If your LocalPointer is
|
||||
* in a local variable, you must use std::move.
|
||||
*
|
||||
* @return An std::unique_ptr owning the pointer previously owned by this
|
||||
* icu::LocalPointer.
|
||||
* @stable ICU 64
|
||||
*/
|
||||
operator std::unique_ptr<T[]> () && {
|
||||
return std::unique_ptr<T[]>(LocalPointerBase<T>::orphan());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \def U_DEFINE_LOCAL_OPEN_POINTER
|
||||
* "Smart pointer" definition macro, deletes objects via the closeFunction.
|
||||
* Defines a subclass of LocalPointerBase which works just
|
||||
* like LocalPointer<Type> except that this subclass will use the closeFunction
|
||||
* rather than the C++ delete operator.
|
||||
*
|
||||
* Usage example:
|
||||
* \code
|
||||
* LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
|
||||
* utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
|
||||
* utf8Out, (int32_t)sizeof(utf8Out),
|
||||
* utf8In, utf8InLength, &errorCode);
|
||||
* if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap
|
||||
* \endcode
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
|
||||
class LocalPointerClassName : public LocalPointerBase<Type> { \
|
||||
public: \
|
||||
using LocalPointerBase<Type>::operator*; \
|
||||
using LocalPointerBase<Type>::operator->; \
|
||||
explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
|
||||
LocalPointerClassName(LocalPointerClassName &&src) U_NOEXCEPT \
|
||||
: LocalPointerBase<Type>(src.ptr) { \
|
||||
src.ptr=NULL; \
|
||||
} \
|
||||
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
|
||||
explicit LocalPointerClassName(std::unique_ptr<Type, decltype(&closeFunction)> &&p) \
|
||||
: LocalPointerBase<Type>(p.release()) {} \
|
||||
~LocalPointerClassName() { if (ptr != NULL) { closeFunction(ptr); } } \
|
||||
LocalPointerClassName &operator=(LocalPointerClassName &&src) U_NOEXCEPT { \
|
||||
if (ptr != NULL) { closeFunction(ptr); } \
|
||||
LocalPointerBase<Type>::ptr=src.ptr; \
|
||||
src.ptr=NULL; \
|
||||
return *this; \
|
||||
} \
|
||||
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
|
||||
LocalPointerClassName &operator=(std::unique_ptr<Type, decltype(&closeFunction)> &&p) { \
|
||||
adoptInstead(p.release()); \
|
||||
return *this; \
|
||||
} \
|
||||
void swap(LocalPointerClassName &other) U_NOEXCEPT { \
|
||||
Type *temp=LocalPointerBase<Type>::ptr; \
|
||||
LocalPointerBase<Type>::ptr=other.ptr; \
|
||||
other.ptr=temp; \
|
||||
} \
|
||||
friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \
|
||||
p1.swap(p2); \
|
||||
} \
|
||||
void adoptInstead(Type *p) { \
|
||||
if (ptr != NULL) { closeFunction(ptr); } \
|
||||
ptr=p; \
|
||||
} \
|
||||
operator std::unique_ptr<Type, decltype(&closeFunction)> () && { \
|
||||
return std::unique_ptr<Type, decltype(&closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction); \
|
||||
} \
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
#endif /* __LOCALPOINTER_H__ */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,94 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 03/14/00 aliu Creation.
|
||||
* 06/27/00 aliu Change from C++ class to C struct
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef PARSEERR_H
|
||||
#define PARSEERR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Parse Error Information
|
||||
*/
|
||||
/**
|
||||
* The capacity of the context strings in UParseError.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
enum { U_PARSE_CONTEXT_LEN = 16 };
|
||||
|
||||
/**
|
||||
* A UParseError struct is used to returned detailed information about
|
||||
* parsing errors. It is used by ICU parsing engines that parse long
|
||||
* rules, patterns, or programs, where the text being parsed is long
|
||||
* enough that more information than a UErrorCode is needed to
|
||||
* localize the error.
|
||||
*
|
||||
* <p>The line, offset, and context fields are optional; parsing
|
||||
* engines may choose not to use to use them.
|
||||
*
|
||||
* <p>The preContext and postContext strings include some part of the
|
||||
* context surrounding the error. If the source text is "let for=7"
|
||||
* and "for" is the error (e.g., because it is a reserved word), then
|
||||
* some examples of what a parser might produce are the following:
|
||||
*
|
||||
* <pre>
|
||||
* preContext postContext
|
||||
* "" "" The parser does not support context
|
||||
* "let " "=7" Pre- and post-context only
|
||||
* "let " "for=7" Pre- and post-context and error text
|
||||
* "" "for" Error text only
|
||||
* </pre>
|
||||
*
|
||||
* <p>Examples of engines which use UParseError (or may use it in the
|
||||
* future) are Transliterator, RuleBasedBreakIterator, and
|
||||
* RegexPattern.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UParseError {
|
||||
|
||||
/**
|
||||
* The line on which the error occurred. If the parser uses this
|
||||
* field, it sets it to the line number of the source text line on
|
||||
* which the error appears, which will be a value >= 1. If the
|
||||
* parse does not support line numbers, the value will be <= 0.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t line;
|
||||
|
||||
/**
|
||||
* The character offset to the error. If the line field is >= 1,
|
||||
* then this is the offset from the start of the line. Otherwise,
|
||||
* this is the offset from the start of the text. If the parser
|
||||
* does not support this field, it will have a value < 0.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t offset;
|
||||
|
||||
/**
|
||||
* Textual context before the error. Null-terminated. The empty
|
||||
* string if not supported by parser.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UChar preContext[U_PARSE_CONTEXT_LEN];
|
||||
|
||||
/**
|
||||
* The error itself and/or textual context after the error.
|
||||
* Null-terminated. The empty string if not supported by parser.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UChar postContext[U_PARSE_CONTEXT_LEN];
|
||||
|
||||
} UParseError;
|
||||
|
||||
#endif
|
||||
@@ -1,885 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : platform.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _PLATFORM_H
|
||||
#define _PLATFORM_H
|
||||
|
||||
#include "unicode/uconfig.h"
|
||||
#include "unicode/uvernum.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Basic types for the platform.
|
||||
*
|
||||
* This file used to be generated by autoconf/configure.
|
||||
* Starting with ICU 49, platform.h is a normal source file,
|
||||
* to simplify cross-compiling and working with non-autoconf/make build systems.
|
||||
*
|
||||
* When a value in this file does not work on a platform, then please
|
||||
* try to derive it from the U_PLATFORM value
|
||||
* (for which we might need a new value constant in rare cases)
|
||||
* and/or from other macros that are predefined by the compiler
|
||||
* or defined in standard (POSIX or platform or compiler) headers.
|
||||
*
|
||||
* As a temporary workaround, you can add an explicit \#define for some macros
|
||||
* before it is first tested, or add an equivalent -D macro definition
|
||||
* to the compiler's command line.
|
||||
*
|
||||
* Note: Some compilers provide ways to show the predefined macros.
|
||||
* For example, with gcc you can compile an empty .c file and have the compiler
|
||||
* print the predefined macros with
|
||||
* \code
|
||||
* gcc -E -dM -x c /dev/null | sort
|
||||
* \endcode
|
||||
* (You can provide an actual empty .c file rather than /dev/null.
|
||||
* <code>-x c++</code> is for C++.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Define some things so that they can be documented.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_IN_DOXYGEN
|
||||
/*
|
||||
* Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
|
||||
* Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
|
||||
*/
|
||||
|
||||
/* None for now. */
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM
|
||||
* The U_PLATFORM macro defines the platform we're on.
|
||||
*
|
||||
* We used to define one different, value-less macro per platform.
|
||||
* That made it hard to know the set of relevant platforms and macros,
|
||||
* and hard to deal with variants of platforms.
|
||||
*
|
||||
* Starting with ICU 49, we define platforms as numeric macros,
|
||||
* with ranges of values for related platforms and their variants.
|
||||
* The U_PLATFORM macro is set to one of these values.
|
||||
*
|
||||
* Historical note from the Solaris Wikipedia article:
|
||||
* AT&T and Sun collaborated on a project to merge the most popular Unix variants
|
||||
* on the market at that time: BSD, System V, and Xenix.
|
||||
* This became Unix System V Release 4 (SVR4).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
/** Unknown platform. @internal */
|
||||
#define U_PF_UNKNOWN 0
|
||||
/** Windows @internal */
|
||||
#define U_PF_WINDOWS 1000
|
||||
/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
|
||||
#define U_PF_MINGW 1800
|
||||
/**
|
||||
* Cygwin. Windows, calls to cygwin1.dll for Posix functions,
|
||||
* using MSVC or GNU gcc and binutils.
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_CYGWIN 1900
|
||||
/* Reserve 2000 for U_PF_UNIX? */
|
||||
/** HP-UX is based on UNIX System V. @internal */
|
||||
#define U_PF_HPUX 2100
|
||||
/** Solaris is a Unix operating system based on SVR4. @internal */
|
||||
#define U_PF_SOLARIS 2600
|
||||
/** BSD is a UNIX operating system derivative. @internal */
|
||||
#define U_PF_BSD 3000
|
||||
/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
|
||||
#define U_PF_AIX 3100
|
||||
/** IRIX is based on UNIX System V with BSD extensions. @internal */
|
||||
#define U_PF_IRIX 3200
|
||||
/**
|
||||
* Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
|
||||
* as well as code derived from NeXTSTEP, BSD, and other projects,
|
||||
* built around the Mach kernel.
|
||||
* Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
|
||||
* (Original description modified from WikiPedia.)
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_DARWIN 3500
|
||||
/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
|
||||
#define U_PF_IPHONE 3550
|
||||
/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
|
||||
#define U_PF_QNX 3700
|
||||
/** Linux is a Unix-like operating system. @internal */
|
||||
#define U_PF_LINUX 4000
|
||||
/**
|
||||
* Native Client is pretty close to Linux.
|
||||
* See https://developer.chrome.com/native-client and
|
||||
* http://www.chromium.org/nativeclient
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_BROWSER_NATIVE_CLIENT 4020
|
||||
/** Android is based on Linux. @internal */
|
||||
#define U_PF_ANDROID 4050
|
||||
/** Fuchsia is a POSIX-ish platform. @internal */
|
||||
#define U_PF_FUCHSIA 4100
|
||||
/* Maximum value for Linux-based platform is 4499 */
|
||||
/**
|
||||
* Emscripten is a C++ transpiler for the Web that can target asm.js or
|
||||
* WebAssembly. It provides some POSIX-compatible wrappers and stubs and
|
||||
* some Linux-like functionality, but is not fully compatible with
|
||||
* either.
|
||||
* @internal
|
||||
*/
|
||||
#define U_PF_EMSCRIPTEN 5010
|
||||
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
|
||||
#define U_PF_OS390 9000
|
||||
/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
|
||||
#define U_PF_OS400 9400
|
||||
|
||||
#ifdef U_PLATFORM
|
||||
/* Use the predefined value. */
|
||||
#elif defined(__MINGW32__)
|
||||
# define U_PLATFORM U_PF_MINGW
|
||||
#elif defined(__CYGWIN__)
|
||||
# define U_PLATFORM U_PF_CYGWIN
|
||||
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
# define U_PLATFORM U_PF_WINDOWS
|
||||
#elif defined(__ANDROID__)
|
||||
# define U_PLATFORM U_PF_ANDROID
|
||||
/* Android wchar_t support depends on the API level. */
|
||||
# include <android/api-level.h>
|
||||
#elif defined(__pnacl__) || defined(__native_client__)
|
||||
# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
|
||||
#elif defined(__Fuchsia__)
|
||||
# define U_PLATFORM U_PF_FUCHSIA
|
||||
#elif defined(linux) || defined(__linux__) || defined(__linux)
|
||||
# define U_PLATFORM U_PF_LINUX
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
# include <TargetConditionals.h>
|
||||
# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */
|
||||
# define U_PLATFORM U_PF_IPHONE
|
||||
# else
|
||||
# define U_PLATFORM U_PF_DARWIN
|
||||
# endif
|
||||
#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
|
||||
# if defined(__FreeBSD__)
|
||||
# include <sys/endian.h>
|
||||
# endif
|
||||
# define U_PLATFORM U_PF_BSD
|
||||
#elif defined(sun) || defined(__sun)
|
||||
/* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
|
||||
# define U_PLATFORM U_PF_SOLARIS
|
||||
# if defined(__GNUC__)
|
||||
/* Solaris/GCC needs this header file to get the proper endianness. Normally, this
|
||||
* header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
|
||||
* is included which does not include this header file.
|
||||
*/
|
||||
# include <sys/isa_defs.h>
|
||||
# endif
|
||||
#elif defined(_AIX) || defined(__TOS_AIX__)
|
||||
# define U_PLATFORM U_PF_AIX
|
||||
#elif defined(_hpux) || defined(hpux) || defined(__hpux)
|
||||
# define U_PLATFORM U_PF_HPUX
|
||||
#elif defined(sgi) || defined(__sgi)
|
||||
# define U_PLATFORM U_PF_IRIX
|
||||
#elif defined(__QNX__) || defined(__QNXNTO__)
|
||||
# define U_PLATFORM U_PF_QNX
|
||||
#elif defined(__TOS_MVS__)
|
||||
# define U_PLATFORM U_PF_OS390
|
||||
#elif defined(__OS400__) || defined(__TOS_OS400__)
|
||||
# define U_PLATFORM U_PF_OS400
|
||||
#elif defined(__EMSCRIPTEN__)
|
||||
# define U_PLATFORM U_PF_EMSCRIPTEN
|
||||
#else
|
||||
# define U_PLATFORM U_PF_UNKNOWN
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def CYGWINMSVC
|
||||
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
|
||||
* Otherwise undefined.
|
||||
* @internal
|
||||
*/
|
||||
/* Commented out because this is already set in mh-cygwin-msvc
|
||||
#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
|
||||
# define CYGWINMSVC
|
||||
#endif
|
||||
*/
|
||||
#ifdef U_IN_DOXYGEN
|
||||
# define CYGWINMSVC
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_USES_ONLY_WIN32_API
|
||||
* Defines whether the platform uses only the Win32 API.
|
||||
* Set to 1 for Windows/MSVC and MinGW but not Cygwin.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_USES_ONLY_WIN32_API
|
||||
/* Use the predefined value. */
|
||||
#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
|
||||
# define U_PLATFORM_USES_ONLY_WIN32_API 1
|
||||
#else
|
||||
/* Cygwin implements POSIX. */
|
||||
# define U_PLATFORM_USES_ONLY_WIN32_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_HAS_WIN32_API
|
||||
* Defines whether the Win32 API is available on the platform.
|
||||
* Set to 1 for Windows/MSVC, MinGW and Cygwin.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_HAS_WIN32_API
|
||||
/* Use the predefined value. */
|
||||
#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
|
||||
# define U_PLATFORM_HAS_WIN32_API 1
|
||||
#else
|
||||
# define U_PLATFORM_HAS_WIN32_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_HAS_WINUWP_API
|
||||
* Defines whether target is intended for Universal Windows Platform API
|
||||
* Set to 1 for Windows10 Release Solution Configuration
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_HAS_WINUWP_API
|
||||
/* Use the predefined value. */
|
||||
#else
|
||||
# define U_PLATFORM_HAS_WINUWP_API 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_IMPLEMENTS_POSIX
|
||||
* Defines whether the platform implements (most of) the POSIX API.
|
||||
* Set to 1 for Cygwin and most other platforms.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_IMPLEMENTS_POSIX
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_USES_ONLY_WIN32_API
|
||||
# define U_PLATFORM_IMPLEMENTS_POSIX 0
|
||||
#else
|
||||
# define U_PLATFORM_IMPLEMENTS_POSIX 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_IS_LINUX_BASED
|
||||
* Defines whether the platform is Linux or one of its derivatives.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_IS_LINUX_BASED
|
||||
/* Use the predefined value. */
|
||||
#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499
|
||||
# define U_PLATFORM_IS_LINUX_BASED 1
|
||||
#else
|
||||
# define U_PLATFORM_IS_LINUX_BASED 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_IS_DARWIN_BASED
|
||||
* Defines whether the platform is Darwin or one of its derivatives.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_PLATFORM_IS_DARWIN_BASED
|
||||
/* Use the predefined value. */
|
||||
#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
|
||||
# define U_PLATFORM_IS_DARWIN_BASED 1
|
||||
#else
|
||||
# define U_PLATFORM_IS_DARWIN_BASED 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_STDINT_H
|
||||
* Defines whether stdint.h is available. It is a C99 standard header.
|
||||
* We used to include inttypes.h which includes stdint.h but we usually do not need
|
||||
* the additional definitions from inttypes.h.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_STDINT_H
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_USES_ONLY_WIN32_API
|
||||
# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
|
||||
/* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
|
||||
# define U_HAVE_STDINT_H 1
|
||||
# else
|
||||
# define U_HAVE_STDINT_H 0
|
||||
# endif
|
||||
#elif U_PLATFORM == U_PF_SOLARIS
|
||||
/* Solaris has inttypes.h but not stdint.h. */
|
||||
# define U_HAVE_STDINT_H 0
|
||||
#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
|
||||
/* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
|
||||
# define U_HAVE_STDINT_H 0
|
||||
#else
|
||||
# define U_HAVE_STDINT_H 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_INTTYPES_H
|
||||
* Defines whether inttypes.h is available. It is a C99 standard header.
|
||||
* We include inttypes.h where it is available but stdint.h is not.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_INTTYPES_H
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM == U_PF_SOLARIS
|
||||
/* Solaris has inttypes.h but not stdint.h. */
|
||||
# define U_HAVE_INTTYPES_H 1
|
||||
#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
|
||||
/* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
|
||||
# define U_HAVE_INTTYPES_H 1
|
||||
#else
|
||||
/* Most platforms have both inttypes.h and stdint.h, or neither. */
|
||||
# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Compiler and environment features */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_GCC_MAJOR_MINOR
|
||||
* Indicates whether the compiler is gcc (test for != 0),
|
||||
* and if so, contains its major (times 100) and minor version numbers.
|
||||
* If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
|
||||
*
|
||||
* For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
|
||||
* use "#if U_GCC_MAJOR_MINOR >= 406".
|
||||
* @internal
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
#else
|
||||
# define U_GCC_MAJOR_MINOR 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_IS_BIG_ENDIAN
|
||||
* Determines the endianness of the platform.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_IS_BIG_ENDIAN
|
||||
/* Use the predefined value. */
|
||||
#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
|
||||
# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
|
||||
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
|
||||
/* gcc */
|
||||
# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
|
||||
# define U_IS_BIG_ENDIAN 0
|
||||
#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
|
||||
/* These platforms do not appear to predefine any endianness macros. */
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
|
||||
/* HPPA do not appear to predefine any endianness macros. */
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
|
||||
/* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
|
||||
# define U_IS_BIG_ENDIAN 1
|
||||
#else
|
||||
# define U_IS_BIG_ENDIAN 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_PLACEMENT_NEW
|
||||
* Determines whether to override placement new and delete for STL.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#ifdef U_HAVE_PLACEMENT_NEW
|
||||
/* Use the predefined value. */
|
||||
#elif defined(__BORLANDC__)
|
||||
# define U_HAVE_PLACEMENT_NEW 0
|
||||
#else
|
||||
# define U_HAVE_PLACEMENT_NEW 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_HAVE_DEBUG_LOCATION_NEW
|
||||
* Define this to define the MFC debug version of the operator new.
|
||||
*
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
#ifdef U_HAVE_DEBUG_LOCATION_NEW
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER)
|
||||
# define U_HAVE_DEBUG_LOCATION_NEW 1
|
||||
#else
|
||||
# define U_HAVE_DEBUG_LOCATION_NEW 0
|
||||
#endif
|
||||
|
||||
/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */
|
||||
#ifdef __has_attribute
|
||||
# define UPRV_HAS_ATTRIBUTE(x) __has_attribute(x)
|
||||
#else
|
||||
# define UPRV_HAS_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#ifdef __has_cpp_attribute
|
||||
# define UPRV_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
||||
#else
|
||||
# define UPRV_HAS_CPP_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#ifdef __has_declspec_attribute
|
||||
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) __has_declspec_attribute(x)
|
||||
#else
|
||||
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#ifdef __has_builtin
|
||||
# define UPRV_HAS_BUILTIN(x) __has_builtin(x)
|
||||
#else
|
||||
# define UPRV_HAS_BUILTIN(x) 0
|
||||
#endif
|
||||
#ifdef __has_feature
|
||||
# define UPRV_HAS_FEATURE(x) __has_feature(x)
|
||||
#else
|
||||
# define UPRV_HAS_FEATURE(x) 0
|
||||
#endif
|
||||
#ifdef __has_extension
|
||||
# define UPRV_HAS_EXTENSION(x) __has_extension(x)
|
||||
#else
|
||||
# define UPRV_HAS_EXTENSION(x) 0
|
||||
#endif
|
||||
#ifdef __has_warning
|
||||
# define UPRV_HAS_WARNING(x) __has_warning(x)
|
||||
#else
|
||||
# define UPRV_HAS_WARNING(x) 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_MALLOC_ATTR
|
||||
* Attribute to mark functions as malloc-like
|
||||
* @internal
|
||||
*/
|
||||
#if defined(__GNUC__) && __GNUC__>=3
|
||||
# define U_MALLOC_ATTR __attribute__ ((__malloc__))
|
||||
#else
|
||||
# define U_MALLOC_ATTR
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_ALLOC_SIZE_ATTR
|
||||
* Attribute to specify the size of the allocated buffer for malloc-like functions
|
||||
* @internal
|
||||
*/
|
||||
#if (defined(__GNUC__) && \
|
||||
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || \
|
||||
UPRV_HAS_ATTRIBUTE(alloc_size)
|
||||
# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
|
||||
# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
|
||||
#else
|
||||
# define U_ALLOC_SIZE_ATTR(X)
|
||||
# define U_ALLOC_SIZE_ATTR2(X,Y)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CPLUSPLUS_VERSION
|
||||
* 0 if no C++; 1, 11, 14, ... if C++.
|
||||
* Support for specific features cannot always be determined by the C++ version alone.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_CPLUSPLUS_VERSION
|
||||
# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus)
|
||||
# undef U_CPLUSPLUS_VERSION
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
# endif
|
||||
/* Otherwise use the predefined value. */
|
||||
#elif !defined(__cplusplus)
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
|
||||
# define U_CPLUSPLUS_VERSION 14
|
||||
#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
|
||||
# define U_CPLUSPLUS_VERSION 11
|
||||
#else
|
||||
// C++98 or C++03
|
||||
# define U_CPLUSPLUS_VERSION 1
|
||||
#endif
|
||||
|
||||
#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
|
||||
// add in std::nullptr_t
|
||||
namespace std {
|
||||
typedef decltype(nullptr) nullptr_t;
|
||||
};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_NOEXCEPT
|
||||
* "noexcept" if supported, otherwise empty.
|
||||
* Some code, especially STL containers, uses move semantics of objects only
|
||||
* if the move constructor and the move operator are declared as not throwing exceptions.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_NOEXCEPT
|
||||
/* Use the predefined value. */
|
||||
#else
|
||||
# define U_NOEXCEPT noexcept
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_FALLTHROUGH
|
||||
* Annotate intentional fall-through between switch labels.
|
||||
* http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
|
||||
* @internal
|
||||
*/
|
||||
#ifndef __cplusplus
|
||||
// Not for C.
|
||||
#elif defined(U_FALLTHROUGH)
|
||||
// Use the predefined value.
|
||||
#elif defined(__clang__)
|
||||
// Test for compiler vs. feature separately.
|
||||
// Other compilers might choke on the feature test.
|
||||
# if UPRV_HAS_CPP_ATTRIBUTE(clang::fallthrough) || \
|
||||
(UPRV_HAS_FEATURE(cxx_attributes) && \
|
||||
UPRV_HAS_WARNING("-Wimplicit-fallthrough"))
|
||||
# define U_FALLTHROUGH [[clang::fallthrough]]
|
||||
# endif
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 7)
|
||||
# define U_FALLTHROUGH __attribute__((fallthrough))
|
||||
#endif
|
||||
|
||||
#ifndef U_FALLTHROUGH
|
||||
# define U_FALLTHROUGH
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Character data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_ASCII_FAMILY 0
|
||||
|
||||
/**
|
||||
* U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_EBCDIC_FAMILY 1
|
||||
|
||||
/**
|
||||
* \def U_CHARSET_FAMILY
|
||||
*
|
||||
* <p>These definitions allow to specify the encoding of text
|
||||
* in the char data type as defined by the platform and the compiler.
|
||||
* It is enough to determine the code point values of "invariant characters",
|
||||
* which are the ones shared by all encodings that are in use
|
||||
* on a given platform.</p>
|
||||
*
|
||||
* <p>Those "invariant characters" should be all the uppercase and lowercase
|
||||
* latin letters, the digits, the space, and "basic punctuation".
|
||||
* Also, '\\n', '\\r', '\\t' should be available.</p>
|
||||
*
|
||||
* <p>The list of "invariant characters" is:<br>
|
||||
* \code
|
||||
* A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _
|
||||
* \endcode
|
||||
* <br>
|
||||
* (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
|
||||
*
|
||||
* <p>This matches the IBM Syntactic Character Set (CS 640).</p>
|
||||
*
|
||||
* <p>In other words, all the graphic characters in 7-bit ASCII should
|
||||
* be safely accessible except the following:</p>
|
||||
*
|
||||
* \code
|
||||
* '\' <backslash>
|
||||
* '[' <left bracket>
|
||||
* ']' <right bracket>
|
||||
* '{' <left brace>
|
||||
* '}' <right brace>
|
||||
* '^' <circumflex>
|
||||
* '~' <tilde>
|
||||
* '!' <exclamation mark>
|
||||
* '#' <number sign>
|
||||
* '|' <vertical line>
|
||||
* '$' <dollar sign>
|
||||
* '@' <commercial at>
|
||||
* '`' <grave accent>
|
||||
* \endcode
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef U_CHARSET_FAMILY
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
|
||||
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
|
||||
#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
|
||||
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
|
||||
#else
|
||||
# define U_CHARSET_FAMILY U_ASCII_FAMILY
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CHARSET_IS_UTF8
|
||||
*
|
||||
* Hardcode the default charset to UTF-8.
|
||||
*
|
||||
* If this is set to 1, then
|
||||
* - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
|
||||
* contain UTF-8 text, regardless of what the system API uses
|
||||
* - some ICU code will use fast functions like u_strFromUTF8()
|
||||
* rather than the more general and more heavy-weight conversion API (ucnv.h)
|
||||
* - ucnv_getDefaultName() always returns "UTF-8"
|
||||
* - ucnv_setDefaultName() is disabled and will not change the default charset
|
||||
* - static builds of ICU are smaller
|
||||
* - more functionality is available with the UCONFIG_NO_CONVERSION build-time
|
||||
* configuration option (see unicode/uconfig.h)
|
||||
* - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
* @see UCONFIG_NO_CONVERSION
|
||||
*/
|
||||
#ifdef U_CHARSET_IS_UTF8
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED || \
|
||||
U_PLATFORM == U_PF_EMSCRIPTEN
|
||||
# define U_CHARSET_IS_UTF8 1
|
||||
#else
|
||||
# define U_CHARSET_IS_UTF8 0
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Information about wchar support */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_HAVE_WCHAR_H
|
||||
* Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef U_HAVE_WCHAR_H
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
|
||||
/*
|
||||
* Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
|
||||
* The type and header existed, but the library functions did not work as expected.
|
||||
* The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
|
||||
*/
|
||||
# define U_HAVE_WCHAR_H 0
|
||||
#else
|
||||
# define U_HAVE_WCHAR_H 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_SIZEOF_WCHAR_T
|
||||
* U_SIZEOF_WCHAR_T==sizeof(wchar_t)
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef U_SIZEOF_WCHAR_T
|
||||
/* Use the predefined value. */
|
||||
#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9)
|
||||
/*
|
||||
* Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
|
||||
* Newer Mac OS X has size 4.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 1
|
||||
#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
#elif U_PLATFORM == U_PF_AIX
|
||||
/*
|
||||
* AIX 6.1 information, section "Wide character data representation":
|
||||
* "... the wchar_t datatype is 32-bit in the 64-bit environment and
|
||||
* 16-bit in the 32-bit environment."
|
||||
* and
|
||||
* "All locales use Unicode for their wide character code values (process code),
|
||||
* except the IBM-eucTW codeset."
|
||||
*/
|
||||
# ifdef __64BIT__
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
# else
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# endif
|
||||
#elif U_PLATFORM == U_PF_OS390
|
||||
/*
|
||||
* z/OS V1R11 information center, section "LP64 | ILP32":
|
||||
* "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
|
||||
* Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
|
||||
*/
|
||||
# ifdef _LP64
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
# else
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# endif
|
||||
#elif U_PLATFORM == U_PF_OS400
|
||||
# if defined(__UTF32__)
|
||||
/*
|
||||
* LOCALETYPE(*LOCALEUTF) is specified.
|
||||
* Wide-character strings are in UTF-32,
|
||||
* narrow-character strings are in UTF-8.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
# elif defined(__UCS2__)
|
||||
/*
|
||||
* LOCALETYPE(*LOCALEUCS2) is specified.
|
||||
* Wide-character strings are in UCS-2,
|
||||
* narrow-character strings are in EBCDIC.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# else
|
||||
/*
|
||||
* LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
|
||||
* Wide-character strings are in 16-bit EBCDIC,
|
||||
* narrow-character strings are in EBCDIC.
|
||||
*/
|
||||
# define U_SIZEOF_WCHAR_T 2
|
||||
# endif
|
||||
#else
|
||||
# define U_SIZEOF_WCHAR_T 4
|
||||
#endif
|
||||
|
||||
#ifndef U_HAVE_WCSCPY
|
||||
#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* \def U_HAVE_CHAR16_T
|
||||
* Defines whether the char16_t type is available for UTF-16
|
||||
* and u"abc" UTF-16 string literals are supported.
|
||||
* This is a new standard type and standard string literal syntax in C++0x
|
||||
* but has been available in some compilers before.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_CHAR16_T
|
||||
/* Use the predefined value. */
|
||||
#else
|
||||
/*
|
||||
* Notes:
|
||||
* Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef
|
||||
* and does not support u"abc" string literals.
|
||||
* Visual Studio 2015 (_MSC_VER>=1900) and above adds support for
|
||||
* both char16_t and u"abc" string literals.
|
||||
* gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
|
||||
* does not support u"abc" string literals.
|
||||
* C++11 and C11 require support for UTF-16 literals
|
||||
* TODO: Fix for plain C. Doesn't work on Mac.
|
||||
*/
|
||||
# if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
|
||||
# define U_HAVE_CHAR16_T 1
|
||||
# else
|
||||
# define U_HAVE_CHAR16_T 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @{
|
||||
* \def U_DECLARE_UTF16
|
||||
* Do not use this macro because it is not defined on all platforms.
|
||||
* Use the UNICODE_STRING or U_STRING_DECL macros instead.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_DECLARE_UTF16
|
||||
/* Use the predefined value. */
|
||||
#elif U_HAVE_CHAR16_T \
|
||||
|| (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
|
||||
|| (defined(__HP_aCC) && __HP_aCC >= 035000) \
|
||||
|| (defined(__HP_cc) && __HP_cc >= 111106) \
|
||||
|| (defined(U_IN_DOXYGEN))
|
||||
# define U_DECLARE_UTF16(string) u ## string
|
||||
#elif U_SIZEOF_WCHAR_T == 2 \
|
||||
&& (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
|
||||
# define U_DECLARE_UTF16(string) L ## string
|
||||
#else
|
||||
/* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/** @{ Symbol import-export control */
|
||||
/*===========================================================================*/
|
||||
|
||||
#ifdef U_EXPORT
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_STATIC_IMPLEMENTATION)
|
||||
# define U_EXPORT
|
||||
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
|
||||
UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
|
||||
# define U_EXPORT __declspec(dllexport)
|
||||
#elif defined(__GNUC__)
|
||||
# define U_EXPORT __attribute__((visibility("default")))
|
||||
#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
|
||||
|| (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
|
||||
# define U_EXPORT __global
|
||||
/*#elif defined(__HP_aCC) || defined(__HP_cc)
|
||||
# define U_EXPORT __declspec(dllexport)*/
|
||||
#else
|
||||
# define U_EXPORT
|
||||
#endif
|
||||
|
||||
/* U_CALLCONV is related to U_EXPORT2 */
|
||||
#ifdef U_EXPORT2
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER)
|
||||
# define U_EXPORT2 __cdecl
|
||||
#else
|
||||
# define U_EXPORT2
|
||||
#endif
|
||||
|
||||
#ifdef U_IMPORT
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
|
||||
UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
|
||||
/* Windows needs to export/import data. */
|
||||
# define U_IMPORT __declspec(dllimport)
|
||||
#else
|
||||
# define U_IMPORT
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CALLCONV
|
||||
* Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
|
||||
* in callback function typedefs to make sure that the calling convention
|
||||
* is compatible.
|
||||
*
|
||||
* This is only used for non-ICU-API functions.
|
||||
* When a function is a public ICU API,
|
||||
* you must use the U_CAPI and U_EXPORT2 qualifiers.
|
||||
*
|
||||
* Please note, you need to use U_CALLCONV after the *.
|
||||
*
|
||||
* NO : "static const char U_CALLCONV *func( . . . )"
|
||||
* YES: "static const char* U_CALLCONV func( . . . )"
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
|
||||
# define U_CALLCONV __cdecl
|
||||
#else
|
||||
# define U_CALLCONV U_EXPORT2
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CALLCONV_FPTR
|
||||
* Similar to U_CALLCONV, but only used on function pointers.
|
||||
* @internal
|
||||
*/
|
||||
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
|
||||
# define U_CALLCONV_FPTR U_CALLCONV
|
||||
#else
|
||||
# define U_CALLCONV_FPTR
|
||||
#endif
|
||||
/** @} */
|
||||
|
||||
#endif // _PLATFORM_H
|
||||
@@ -1,130 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : ptypes.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
* 09/18/08 srl Moved basic types back to ptypes.h from platform.h
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Definitions of integer types of various widths
|
||||
*/
|
||||
|
||||
#ifndef _PTYPES_H
|
||||
#define _PTYPES_H
|
||||
|
||||
/**
|
||||
* \def __STDC_LIMIT_MACROS
|
||||
* According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
|
||||
* macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
|
||||
* We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
|
||||
* that uses such limit macros.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef __STDC_LIMIT_MACROS
|
||||
#define __STDC_LIMIT_MACROS
|
||||
#endif
|
||||
|
||||
/* NULL, size_t, wchar_t */
|
||||
#include <stddef.h>
|
||||
|
||||
/*
|
||||
* If all compilers provided all of the C99 headers and types,
|
||||
* we would just unconditionally #include <stdint.h> here
|
||||
* and not need any of the stuff after including platform.h.
|
||||
*/
|
||||
|
||||
/* Find out if we have stdint.h etc. */
|
||||
#include "unicode/platform.h"
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Generic data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* If your platform does not have the <stdint.h> header, you may
|
||||
need to edit the typedefs in the #else section below.
|
||||
Use #if...#else...#endif with predefined compiler macros if possible. */
|
||||
#if U_HAVE_STDINT_H
|
||||
|
||||
/*
|
||||
* We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
|
||||
* <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
|
||||
* which we almost never use, plus stuff like imaxabs() which we never use.
|
||||
*/
|
||||
#include <stdint.h>
|
||||
|
||||
#if U_PLATFORM == U_PF_OS390
|
||||
/* The features header is needed to get (u)int64_t sometimes. */
|
||||
#include <features.h>
|
||||
/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
|
||||
#if !defined(__uint8_t)
|
||||
#define __uint8_t 1
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
#endif /* U_PLATFORM == U_PF_OS390 */
|
||||
|
||||
#elif U_HAVE_INTTYPES_H
|
||||
|
||||
# include <inttypes.h>
|
||||
|
||||
#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
|
||||
|
||||
/// \cond
|
||||
#if ! U_HAVE_INT8_T
|
||||
typedef signed char int8_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_UINT8_T
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_INT16_T
|
||||
typedef signed short int16_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_UINT16_T
|
||||
typedef unsigned short uint16_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_INT32_T
|
||||
typedef signed int int32_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_UINT32_T
|
||||
typedef unsigned int uint32_t;
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_INT64_T
|
||||
#ifdef _MSC_VER
|
||||
typedef signed __int64 int64_t;
|
||||
#else
|
||||
typedef signed long long int64_t;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if ! U_HAVE_UINT64_T
|
||||
#ifdef _MSC_VER
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
typedef unsigned long long uint64_t;
|
||||
#endif
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
|
||||
|
||||
#endif /* _PTYPES_H */
|
||||
@@ -1,183 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : putil.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/14/98 nos Creation (content moved here from utypes.h).
|
||||
* 06/17/99 erm Added IEEE_754
|
||||
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
|
||||
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
|
||||
* 08/24/98 stephen Added longBitsFromDouble
|
||||
* 03/02/99 stephen Removed openFile(). Added AS400 support.
|
||||
* 04/15/99 stephen Converted to C
|
||||
* 11/15/99 helena Integrated S/390 changes for IEEE support.
|
||||
* 01/11/00 helena Added u_getVersion.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef PUTIL_H
|
||||
#define PUTIL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Platform Utilities
|
||||
*/
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Platform utilities */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* Platform utilities isolates the platform dependencies of the
|
||||
* library. For each platform which this code is ported to, these
|
||||
* functions may have to be re-implemented.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Return the ICU data directory.
|
||||
* The data directory is where common format ICU data files (.dat files)
|
||||
* are loaded from. Note that normal use of the built-in ICU
|
||||
* facilities does not require loading of an external data file;
|
||||
* unless you are adding custom data to ICU, the data directory
|
||||
* does not need to be set.
|
||||
*
|
||||
* The data directory is determined as follows:
|
||||
* If u_setDataDirectory() has been called, that is it, otherwise
|
||||
* if the ICU_DATA environment variable is set, use that, otherwise
|
||||
* If a data directory was specified at ICU build time
|
||||
* <code>
|
||||
* \code
|
||||
* #define ICU_DATA_DIR "path"
|
||||
* \endcode
|
||||
* </code> use that,
|
||||
* otherwise no data directory is available.
|
||||
*
|
||||
* @return the data directory, or an empty string ("") if no data directory has
|
||||
* been specified.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
|
||||
|
||||
|
||||
/**
|
||||
* Set the ICU data directory.
|
||||
* The data directory is where common format ICU data files (.dat files)
|
||||
* are loaded from. Note that normal use of the built-in ICU
|
||||
* facilities does not require loading of an external data file;
|
||||
* unless you are adding custom data to ICU, the data directory
|
||||
* does not need to be set.
|
||||
*
|
||||
* This function should be called at most once in a process, before the
|
||||
* first ICU operation (e.g., u_init()) that will require the loading of an
|
||||
* ICU data file.
|
||||
* This function is not thread-safe. Use it before calling ICU APIs from
|
||||
* multiple threads.
|
||||
*
|
||||
* @param directory The directory to be set.
|
||||
*
|
||||
* @see u_init
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Return the time zone files override directory, or an empty string if
|
||||
* no directory was specified. Certain time zone resources will be preferentially
|
||||
* loaded from individual files in this directory.
|
||||
*
|
||||
* @return the time zone data override directory.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Set the time zone files override directory.
|
||||
* This function is not thread safe; it must not be called concurrently with
|
||||
* u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions.
|
||||
* This function should only be called before using any ICU service that
|
||||
* will access the time zone data.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
|
||||
/**
|
||||
* @{
|
||||
* Filesystem file and path separator characters.
|
||||
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_PLATFORM_USES_ONLY_WIN32_API
|
||||
# define U_FILE_SEP_CHAR '\\'
|
||||
# define U_FILE_ALT_SEP_CHAR '/'
|
||||
# define U_PATH_SEP_CHAR ';'
|
||||
# define U_FILE_SEP_STRING "\\"
|
||||
# define U_FILE_ALT_SEP_STRING "/"
|
||||
# define U_PATH_SEP_STRING ";"
|
||||
#else
|
||||
# define U_FILE_SEP_CHAR '/'
|
||||
# define U_FILE_ALT_SEP_CHAR '/'
|
||||
# define U_PATH_SEP_CHAR ':'
|
||||
# define U_FILE_SEP_STRING "/"
|
||||
# define U_FILE_ALT_SEP_STRING "/"
|
||||
# define U_PATH_SEP_STRING ":"
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Convert char characters to UChar characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that are encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see U_CHARSET_FAMILY.
|
||||
*
|
||||
* @param cs Input string, points to <code>length</code>
|
||||
* character bytes from a subset of the platform encoding.
|
||||
* @param us Output string, points to memory for <code>length</code>
|
||||
* Unicode characters.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*
|
||||
* @see U_CHARSET_FAMILY
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_charsToUChars(const char *cs, UChar *us, int32_t length);
|
||||
|
||||
/**
|
||||
* Convert UChar characters to char characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that can be encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see U_CHARSET_FAMILY.
|
||||
*
|
||||
* @param us Input string, points to <code>length</code>
|
||||
* Unicode characters that can be encoded with the
|
||||
* codepage-invariant subset of the platform encoding.
|
||||
* @param cs Output string, points to memory for <code>length</code>
|
||||
* character bytes.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*
|
||||
* @see U_CHARSET_FAMILY
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_UCharsToChars(const UChar *us, char *cs, int32_t length);
|
||||
|
||||
#endif
|
||||
@@ -1,266 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**************************************************************************
|
||||
* Copyright (C) 1999-2012, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
**************************************************************************
|
||||
* Date Name Description
|
||||
* 11/17/99 aliu Creation. Ported from java. Modified to
|
||||
* match current UnicodeString API. Forced
|
||||
* to use name "handleReplaceBetween" because
|
||||
* of existing methods in UnicodeString.
|
||||
**************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef REP_H
|
||||
#define REP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Replaceable String
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
* <code>Replaceable</code> is an abstract base class representing a
|
||||
* string of characters that supports the replacement of a range of
|
||||
* itself with a new string of characters. It is used by APIs that
|
||||
* change a piece of text while retaining metadata. Metadata is data
|
||||
* other than the Unicode characters returned by char32At(). One
|
||||
* example of metadata is style attributes; another is an edit
|
||||
* history, marking each character with an author and revision number.
|
||||
*
|
||||
* <p>An implicit aspect of the <code>Replaceable</code> API is that
|
||||
* during a replace operation, new characters take on the metadata of
|
||||
* the old characters. For example, if the string "the <b>bold</b>
|
||||
* font" has range (4, 8) replaced with "strong", then it becomes "the
|
||||
* <b>strong</b> font".
|
||||
*
|
||||
* <p><code>Replaceable</code> specifies ranges using a start
|
||||
* offset and a limit offset. The range of characters thus specified
|
||||
* includes the characters at offset start..limit-1. That is, the
|
||||
* start offset is inclusive, and the limit offset is exclusive.
|
||||
*
|
||||
* <p><code>Replaceable</code> also includes API to access characters
|
||||
* in the string: <code>length()</code>, <code>charAt()</code>,
|
||||
* <code>char32At()</code>, and <code>extractBetween()</code>.
|
||||
*
|
||||
* <p>For a subclass to support metadata, typical behavior of
|
||||
* <code>replace()</code> is the following:
|
||||
* <ul>
|
||||
* <li>Set the metadata of the new text to the metadata of the first
|
||||
* character replaced</li>
|
||||
* <li>If no characters are replaced, use the metadata of the
|
||||
* previous character</li>
|
||||
* <li>If there is no previous character (i.e. start == 0), use the
|
||||
* following character</li>
|
||||
* <li>If there is no following character (i.e. the replaceable was
|
||||
* empty), use default metadata.<br>
|
||||
* <li>If the code point U+FFFF is seen, it should be interpreted as
|
||||
* a special marker having no metadata<li>
|
||||
* </li>
|
||||
* </ul>
|
||||
* If this is not the behavior, the subclass should document any differences.
|
||||
* @author Alan Liu
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_COMMON_API Replaceable : public UObject {
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual ~Replaceable();
|
||||
|
||||
/**
|
||||
* Returns the number of 16-bit code units in the text.
|
||||
* @return number of 16-bit code units in text
|
||||
* @stable ICU 1.8
|
||||
*/
|
||||
inline int32_t length() const;
|
||||
|
||||
/**
|
||||
* Returns the 16-bit code unit at the given offset into the text.
|
||||
* @param offset an integer between 0 and <code>length()</code>-1
|
||||
* inclusive
|
||||
* @return 16-bit code unit of text at given offset
|
||||
* @stable ICU 1.8
|
||||
*/
|
||||
inline char16_t charAt(int32_t offset) const;
|
||||
|
||||
/**
|
||||
* Returns the 32-bit code point at the given 16-bit offset into
|
||||
* the text. This assumes the text is stored as 16-bit code units
|
||||
* with surrogate pairs intermixed. If the offset of a leading or
|
||||
* trailing code unit of a surrogate pair is given, return the
|
||||
* code point of the surrogate pair.
|
||||
*
|
||||
* @param offset an integer between 0 and <code>length()</code>-1
|
||||
* inclusive
|
||||
* @return 32-bit code point of text at given offset
|
||||
* @stable ICU 1.8
|
||||
*/
|
||||
inline UChar32 char32At(int32_t offset) const;
|
||||
|
||||
/**
|
||||
* Copies characters in the range [<tt>start</tt>, <tt>limit</tt>)
|
||||
* into the UnicodeString <tt>target</tt>.
|
||||
* @param start offset of first character which will be copied
|
||||
* @param limit offset immediately following the last character to
|
||||
* be copied
|
||||
* @param target UnicodeString into which to copy characters.
|
||||
* @return A reference to <TT>target</TT>
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
virtual void extractBetween(int32_t start,
|
||||
int32_t limit,
|
||||
UnicodeString& target) const = 0;
|
||||
|
||||
/**
|
||||
* Replaces a substring of this object with the given text. If the
|
||||
* characters being replaced have metadata, the new characters
|
||||
* that replace them should be given the same metadata.
|
||||
*
|
||||
* <p>Subclasses must ensure that if the text between start and
|
||||
* limit is equal to the replacement text, that replace has no
|
||||
* effect. That is, any metadata
|
||||
* should be unaffected. In addition, subclasses are encouraged to
|
||||
* check for initial and trailing identical characters, and make a
|
||||
* smaller replacement if possible. This will preserve as much
|
||||
* metadata as possible.
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= length()</code>.
|
||||
* @param text the text to replace characters <code>start</code>
|
||||
* to <code>limit - 1</code>
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void handleReplaceBetween(int32_t start,
|
||||
int32_t limit,
|
||||
const UnicodeString& text) = 0;
|
||||
// Note: All other methods in this class take the names of
|
||||
// existing UnicodeString methods. This method is the exception.
|
||||
// It is named differently because all replace methods of
|
||||
// UnicodeString return a UnicodeString&. The 'between' is
|
||||
// required in order to conform to the UnicodeString naming
|
||||
// convention; API taking start/length are named <operation>, and
|
||||
// those taking start/limit are named <operationBetween>. The
|
||||
// 'handle' is added because 'replaceBetween' and
|
||||
// 'doReplaceBetween' are already taken.
|
||||
|
||||
/**
|
||||
* Copies a substring of this object, retaining metadata.
|
||||
* This method is used to duplicate or reorder substrings.
|
||||
* The destination index must not overlap the source range.
|
||||
*
|
||||
* @param start the beginning index, inclusive; <code>0 <= start <=
|
||||
* limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit <=
|
||||
* length()</code>.
|
||||
* @param dest the destination index. The characters from
|
||||
* <code>start..limit-1</code> will be copied to <code>dest</code>.
|
||||
* Implementations of this method may assume that <code>dest <= start ||
|
||||
* dest >= limit</code>.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
|
||||
|
||||
/**
|
||||
* Returns true if this object contains metadata. If a
|
||||
* Replaceable object has metadata, calls to the Replaceable API
|
||||
* must be made so as to preserve metadata. If it does not, calls
|
||||
* to the Replaceable API may be optimized to improve performance.
|
||||
* The default implementation returns true.
|
||||
* @return true if this object contains metadata
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual UBool hasMetaData() const;
|
||||
|
||||
/**
|
||||
* Clone this object, an instance of a subclass of Replaceable.
|
||||
* Clones can be used concurrently in multiple threads.
|
||||
* If a subclass does not implement clone(), or if an error occurs,
|
||||
* then NULL is returned.
|
||||
* The caller must delete the clone.
|
||||
*
|
||||
* @return a clone of this object
|
||||
*
|
||||
* @see getDynamicClassID
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
virtual Replaceable *clone() const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
inline Replaceable();
|
||||
|
||||
/*
|
||||
* Assignment operator not declared. The compiler will provide one
|
||||
* which does nothing since this class does not contain any data members.
|
||||
* API/code coverage may show the assignment operator as present and
|
||||
* untested - ignore.
|
||||
* Subclasses need this assignment operator if they use compiler-provided
|
||||
* assignment operators of their own. An alternative to not declaring one
|
||||
* here would be to declare and empty-implement a protected or public one.
|
||||
Replaceable &Replaceable::operator=(const Replaceable &);
|
||||
*/
|
||||
|
||||
/**
|
||||
* Virtual version of length().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual int32_t getLength() const = 0;
|
||||
|
||||
/**
|
||||
* Virtual version of charAt().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual char16_t getCharAt(int32_t offset) const = 0;
|
||||
|
||||
/**
|
||||
* Virtual version of char32At().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual UChar32 getChar32At(int32_t offset) const = 0;
|
||||
};
|
||||
|
||||
inline Replaceable::Replaceable() {}
|
||||
|
||||
inline int32_t
|
||||
Replaceable::length() const {
|
||||
return getLength();
|
||||
}
|
||||
|
||||
inline char16_t
|
||||
Replaceable::charAt(int32_t offset) const {
|
||||
return getCharAt(offset);
|
||||
}
|
||||
|
||||
inline UChar32
|
||||
Replaceable::char32At(int32_t offset) const {
|
||||
return getChar32At(offset);
|
||||
}
|
||||
|
||||
// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
@@ -1,41 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: std_string.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009feb19
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __STD_STRING_H__
|
||||
#define __STD_STRING_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Central ICU header for including the C++ standard <string>
|
||||
* header and for related definitions.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
// Workaround for a libstdc++ bug before libstdc++4.6 (2011).
|
||||
// https://bugs.llvm.org/show_bug.cgi?id=13364
|
||||
#if defined(__GLIBCXX__)
|
||||
namespace std { class type_info; }
|
||||
#endif
|
||||
#include <string>
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __STD_STRING_H__
|
||||
@@ -1,281 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef STRENUM_H
|
||||
#define STRENUM_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: String Enumeration
|
||||
*/
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Base class for 'pure' C++ implementations of uenum api. Adds a
|
||||
* method that returns the next UnicodeString since in C++ this can
|
||||
* be a common storage format for strings.
|
||||
*
|
||||
* <p>The model is that the enumeration is over strings maintained by
|
||||
* a 'service.' At any point, the service might change, invalidating
|
||||
* the enumerator (though this is expected to be rare). The iterator
|
||||
* returns an error if this has occurred. Lack of the error is no
|
||||
* guarantee that the service didn't change immediately after the
|
||||
* call, so the returned string still might not be 'valid' on
|
||||
* subsequent use.</p>
|
||||
*
|
||||
* <p>Strings may take the form of const char*, const char16_t*, or const
|
||||
* UnicodeString*. The type you get is determine by the variant of
|
||||
* 'next' that you call. In general the StringEnumeration is
|
||||
* optimized for one of these types, but all StringEnumerations can
|
||||
* return all types. Returned strings are each terminated with a NUL.
|
||||
* Depending on the service data, they might also include embedded NUL
|
||||
* characters, so API is provided to optionally return the true
|
||||
* length, counting the embedded NULs but not counting the terminating
|
||||
* NUL.</p>
|
||||
*
|
||||
* <p>The pointers returned by next, unext, and snext become invalid
|
||||
* upon any subsequent call to the enumeration's destructor, next,
|
||||
* unext, snext, or reset.</p>
|
||||
*
|
||||
* ICU 2.8 adds some default implementations and helper functions
|
||||
* for subclasses.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API StringEnumeration : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual ~StringEnumeration();
|
||||
|
||||
/**
|
||||
* Clone this object, an instance of a subclass of StringEnumeration.
|
||||
* Clones can be used concurrently in multiple threads.
|
||||
* If a subclass does not implement clone(), or if an error occurs,
|
||||
* then NULL is returned.
|
||||
* The caller must delete the clone.
|
||||
*
|
||||
* @return a clone of this object
|
||||
*
|
||||
* @see getDynamicClassID
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
virtual StringEnumeration *clone() const;
|
||||
|
||||
/**
|
||||
* <p>Return the number of elements that the iterator traverses. If
|
||||
* the iterator is out of sync with its service, status is set to
|
||||
* U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
|
||||
*
|
||||
* <p>The return value will not change except possibly as a result of
|
||||
* a subsequent call to reset, or if the iterator becomes out of sync.</p>
|
||||
*
|
||||
* <p>This is a convenience function. It can end up being very
|
||||
* expensive as all the items might have to be pre-fetched
|
||||
* (depending on the storage format of the data being
|
||||
* traversed).</p>
|
||||
*
|
||||
* @param status the error code.
|
||||
* @return number of elements in the iterator.
|
||||
*
|
||||
* @stable ICU 2.4 */
|
||||
virtual int32_t count(UErrorCode& status) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Returns the next element as a NUL-terminated char*. If there
|
||||
* are no more elements, returns NULL. If the resultLength pointer
|
||||
* is not NULL, the length of the string (not counting the
|
||||
* terminating NUL) is returned at that address. If an error
|
||||
* status is returned, the value at resultLength is undefined.</p>
|
||||
*
|
||||
* <p>The returned pointer is owned by this iterator and must not be
|
||||
* deleted by the caller. The pointer is valid until the next call
|
||||
* to next, unext, snext, reset, or the enumerator's destructor.</p>
|
||||
*
|
||||
* <p>If the iterator is out of sync with its service, status is set
|
||||
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
|
||||
*
|
||||
* <p>If the native service string is a char16_t* string, it is
|
||||
* converted to char* with the invariant converter. If the
|
||||
* conversion fails (because a character cannot be converted) then
|
||||
* status is set to U_INVARIANT_CONVERSION_ERROR and the return
|
||||
* value is undefined (though not NULL).</p>
|
||||
*
|
||||
* Starting with ICU 2.8, the default implementation calls snext()
|
||||
* and handles the conversion.
|
||||
* Either next() or snext() must be implemented differently by a subclass.
|
||||
*
|
||||
* @param status the error code.
|
||||
* @param resultLength a pointer to receive the length, can be NULL.
|
||||
* @return a pointer to the string, or NULL.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual const char* next(int32_t *resultLength, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Returns the next element as a NUL-terminated char16_t*. If there
|
||||
* are no more elements, returns NULL. If the resultLength pointer
|
||||
* is not NULL, the length of the string (not counting the
|
||||
* terminating NUL) is returned at that address. If an error
|
||||
* status is returned, the value at resultLength is undefined.</p>
|
||||
*
|
||||
* <p>The returned pointer is owned by this iterator and must not be
|
||||
* deleted by the caller. The pointer is valid until the next call
|
||||
* to next, unext, snext, reset, or the enumerator's destructor.</p>
|
||||
*
|
||||
* <p>If the iterator is out of sync with its service, status is set
|
||||
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
|
||||
*
|
||||
* Starting with ICU 2.8, the default implementation calls snext()
|
||||
* and handles the conversion.
|
||||
*
|
||||
* @param status the error code.
|
||||
* @param resultLength a ponter to receive the length, can be NULL.
|
||||
* @return a pointer to the string, or NULL.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Returns the next element a UnicodeString*. If there are no
|
||||
* more elements, returns NULL.</p>
|
||||
*
|
||||
* <p>The returned pointer is owned by this iterator and must not be
|
||||
* deleted by the caller. The pointer is valid until the next call
|
||||
* to next, unext, snext, reset, or the enumerator's destructor.</p>
|
||||
*
|
||||
* <p>If the iterator is out of sync with its service, status is set
|
||||
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
|
||||
*
|
||||
* Starting with ICU 2.8, the default implementation calls next()
|
||||
* and handles the conversion.
|
||||
* Either next() or snext() must be implemented differently by a subclass.
|
||||
*
|
||||
* @param status the error code.
|
||||
* @return a pointer to the string, or NULL.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual const UnicodeString* snext(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Resets the iterator. This re-establishes sync with the
|
||||
* service and rewinds the iterator to start at the first
|
||||
* element.</p>
|
||||
*
|
||||
* <p>Previous pointers returned by next, unext, or snext become
|
||||
* invalid, and the value returned by count might change.</p>
|
||||
*
|
||||
* @param status the error code.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
virtual void reset(UErrorCode& status) = 0;
|
||||
|
||||
/**
|
||||
* Compares this enumeration to other to check if both are equal
|
||||
*
|
||||
* @param that The other string enumeration to compare this object to
|
||||
* @return true if the enumerations are equal. false if not.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
virtual UBool operator==(const StringEnumeration& that)const;
|
||||
/**
|
||||
* Compares this enumeration to other to check if both are not equal
|
||||
*
|
||||
* @param that The other string enumeration to compare this object to
|
||||
* @return true if the enumerations are equal. false if not.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
virtual UBool operator!=(const StringEnumeration& that)const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* UnicodeString field for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
UnicodeString unistr;
|
||||
/**
|
||||
* char * default buffer for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
char charsBuffer[32];
|
||||
/**
|
||||
* char * buffer for use with default implementations and subclasses.
|
||||
* Allocated in constructor and in ensureCharsCapacity().
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
char *chars;
|
||||
/**
|
||||
* Capacity of chars, for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
int32_t charsCapacity;
|
||||
|
||||
/**
|
||||
* Default constructor for use with default implementations and subclasses.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
StringEnumeration();
|
||||
|
||||
/**
|
||||
* Ensures that chars is at least as large as the requested capacity.
|
||||
* For use with default implementations and subclasses.
|
||||
*
|
||||
* @param capacity Requested capacity.
|
||||
* @param status ICU in/out error code.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Converts s to Unicode and sets unistr to the result.
|
||||
* For use with default implementations and subclasses,
|
||||
* especially for implementations of snext() in terms of next().
|
||||
* This is provided with a helper function instead of a default implementation
|
||||
* of snext() to avoid potential infinite loops between next() and snext().
|
||||
*
|
||||
* For example:
|
||||
* \code
|
||||
* const UnicodeString* snext(UErrorCode& status) {
|
||||
* int32_t resultLength=0;
|
||||
* const char *s=next(&resultLength, status);
|
||||
* return setChars(s, resultLength, status);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param s String to be converted to Unicode.
|
||||
* @param length Length of the string.
|
||||
* @param status ICU in/out error code.
|
||||
* @return A pointer to unistr.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
/* STRENUM_H */
|
||||
#endif
|
||||
@@ -1,190 +0,0 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// stringoptions.h
|
||||
// created: 2017jun08 Markus W. Scherer
|
||||
|
||||
#ifndef __STRINGOPTIONS_H__
|
||||
#define __STRINGOPTIONS_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Bit set option bit constants for various string and character processing functions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Option value for case folding: Use default mappings defined in CaseFolding.txt.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_FOLD_CASE_DEFAULT 0
|
||||
|
||||
/**
|
||||
* Option value for case folding:
|
||||
*
|
||||
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
|
||||
* and dotless i appropriately for Turkic languages (tr, az).
|
||||
*
|
||||
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
|
||||
* are to be included for default mappings and
|
||||
* excluded for the Turkic-specific mappings.
|
||||
*
|
||||
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
|
||||
* are to be excluded for default mappings and
|
||||
* included for the Turkic-specific mappings.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
|
||||
|
||||
/**
|
||||
* Titlecase the string as a whole rather than each word.
|
||||
* (Titlecase only the character at index 0, possibly adjusted.)
|
||||
* Option bits value for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* It is an error to specify multiple titlecasing iterator options together,
|
||||
* including both an options bit and an explicit BreakIterator.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_WHOLE_STRING 0x20
|
||||
|
||||
/**
|
||||
* Titlecase sentences rather than words.
|
||||
* (Titlecase only the first character of each sentence, possibly adjusted.)
|
||||
* Option bits value for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* It is an error to specify multiple titlecasing iterator options together,
|
||||
* including both an options bit and an explicit BreakIterator.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_SENTENCES 0x40
|
||||
|
||||
/**
|
||||
* Do not lowercase non-initial parts of words when titlecasing.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will titlecase the character at each
|
||||
* (possibly adjusted) BreakIterator index and
|
||||
* lowercase all other characters up to the next iterator index.
|
||||
* With this option, the other characters will not be modified.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @see UnicodeString::toTitle
|
||||
* @see CaseMap::toTitle
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_LOWERCASE 0x100
|
||||
|
||||
/**
|
||||
* Do not adjust the titlecasing BreakIterator indexes;
|
||||
* titlecase exactly the characters at breaks from the iterator.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will take each break iterator index,
|
||||
* adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
|
||||
* and titlecase that one.
|
||||
*
|
||||
* Other characters are lowercased.
|
||||
*
|
||||
* It is an error to specify multiple titlecasing adjustment options together.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @see UnicodeString::toTitle
|
||||
* @see CaseMap::toTitle
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
|
||||
|
||||
/**
|
||||
* Adjust each titlecasing BreakIterator index to the next cased character.
|
||||
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* This used to be the default index adjustment in ICU.
|
||||
* Since ICU 60, the default index adjustment is to the next character that is
|
||||
* a letter, number, symbol, or private use code point.
|
||||
* (Uncased modifier letters are skipped.)
|
||||
* The difference in behavior is small for word titlecasing,
|
||||
* but the new adjustment is much better for whole-string and sentence titlecasing:
|
||||
* It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
|
||||
*
|
||||
* It is an error to specify multiple titlecasing adjustment options together.
|
||||
*
|
||||
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_ADJUST_TO_CASED 0x400
|
||||
|
||||
/**
|
||||
* Option for string transformation functions to not first reset the Edits object.
|
||||
* Used for example in some case-mapping and normalization functions.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_EDITS_NO_RESET 0x2000
|
||||
|
||||
/**
|
||||
* Omit unchanged text when recording how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
* Used for example in some case-mapping and normalization functions.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_OMIT_UNCHANGED_TEXT 0x4000
|
||||
|
||||
/**
|
||||
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
|
||||
* Compare strings in code point order instead of code unit order.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_CODE_POINT_ORDER 0x8000
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Perform case-insensitive comparison.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_IGNORE_CASE 0x10000
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Both input strings are assumed to fulfill FCD conditions.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define UNORM_INPUT_IS_FCD 0x20000
|
||||
|
||||
// Related definitions elsewhere.
|
||||
// Options that are not meaningful in the same functions
|
||||
// can share the same bits.
|
||||
//
|
||||
// Public:
|
||||
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
|
||||
//
|
||||
// Internal: (may change or be removed)
|
||||
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
|
||||
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
|
||||
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
|
||||
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
|
||||
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
|
||||
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
|
||||
|
||||
#endif // __STRINGOPTIONS_H__
|
||||
@@ -1,343 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
// Copyright (C) 2009-2013, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Copyright 2001 and onwards Google Inc.
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
// This code is a contribution of Google code, and the style used here is
|
||||
// a compromise between the original Google code and the ICU coding guidelines.
|
||||
// For example, data types are ICU-ified (size_t,int->int32_t),
|
||||
// and API comments doxygen-ified, but function names and behavior are
|
||||
// as in the original, if possible.
|
||||
// Assertion-style error handling, not available in ICU, was changed to
|
||||
// parameter "pinning" similar to UnicodeString.
|
||||
//
|
||||
// In addition, this is only a partial port of the original Google code,
|
||||
// limited to what was needed so far. The (nearly) complete original code
|
||||
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
|
||||
// (see ICU ticket 6765, r25517).
|
||||
|
||||
#ifndef __STRINGPIECE_H__
|
||||
#define __STRINGPIECE_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: StringPiece: Read-only byte string wrapper class.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/std_string.h"
|
||||
|
||||
// Arghh! I wish C++ literals were "string".
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A string-like object that points to a sized piece of memory.
|
||||
*
|
||||
* We provide non-explicit singleton constructors so users can pass
|
||||
* in a "const char*" or a "string" wherever a "StringPiece" is
|
||||
* expected.
|
||||
*
|
||||
* Functions or methods may use StringPiece parameters to accept either a
|
||||
* "const char*" or a "string" value that will be implicitly converted to a
|
||||
* StringPiece.
|
||||
*
|
||||
* Systematic usage of StringPiece is encouraged as it will reduce unnecessary
|
||||
* conversions from "const char*" to "string" and back again.
|
||||
*
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
class U_COMMON_API StringPiece : public UMemory {
|
||||
private:
|
||||
const char* ptr_;
|
||||
int32_t length_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Default constructor, creates an empty StringPiece.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece() : ptr_(nullptr), length_(0) { }
|
||||
|
||||
/**
|
||||
* Constructs from a NUL-terminated const char * pointer.
|
||||
* @param str a NUL-terminated const char * pointer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* str);
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a NUL-terminated const char8_t * pointer.
|
||||
* @param str a NUL-terminated const char8_t * pointer
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
|
||||
#endif
|
||||
/**
|
||||
* Constructs an empty StringPiece.
|
||||
* Needed for type disambiguation from multiple other overloads.
|
||||
* @param p nullptr
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
|
||||
|
||||
/**
|
||||
* Constructs from a std::string.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const std::string& str)
|
||||
: ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
|
||||
#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a std::u8string.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const std::u8string& str)
|
||||
: ptr_(reinterpret_cast<const char*>(str.data())),
|
||||
length_(static_cast<int32_t>(str.size())) { }
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Constructs from some other implementation of a string piece class, from any
|
||||
* C++ record type that has these two methods:
|
||||
*
|
||||
* \code{.cpp}
|
||||
*
|
||||
* struct OtherStringPieceClass {
|
||||
* const char* data(); // or const char8_t*
|
||||
* size_t size();
|
||||
* };
|
||||
*
|
||||
* \endcode
|
||||
*
|
||||
* The other string piece class will typically be std::string_view from C++17
|
||||
* or absl::string_view from Abseil.
|
||||
*
|
||||
* Starting with C++20, data() may also return a const char8_t* pointer,
|
||||
* as from std::u8string_view.
|
||||
*
|
||||
* @param str the other string piece
|
||||
* @stable ICU 65
|
||||
*/
|
||||
template <typename T,
|
||||
typename = typename std::enable_if<
|
||||
(std::is_same<decltype(T().data()), const char*>::value
|
||||
#if defined(__cpp_char8_t)
|
||||
|| std::is_same<decltype(T().data()), const char8_t*>::value
|
||||
#endif
|
||||
) &&
|
||||
std::is_same<decltype(T().size()), size_t>::value>::type>
|
||||
StringPiece(T str)
|
||||
: ptr_(reinterpret_cast<const char*>(str.data())),
|
||||
length_(static_cast<int32_t>(str.size())) {}
|
||||
|
||||
/**
|
||||
* Constructs from a const char * pointer and a specified length.
|
||||
* @param offset a const char * pointer (need not be terminated)
|
||||
* @param len the length of the string; must be non-negative
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a const char8_t * pointer and a specified length.
|
||||
* @param str a const char8_t * pointer (need not be terminated)
|
||||
* @param len the length of the string; must be non-negative
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str, int32_t len) :
|
||||
StringPiece(reinterpret_cast<const char*>(str), len) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Substring of another StringPiece.
|
||||
* @param x the other StringPiece
|
||||
* @param pos start position in x; must be non-negative and <= x.length().
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const StringPiece& x, int32_t pos);
|
||||
/**
|
||||
* Substring of another StringPiece.
|
||||
* @param x the other StringPiece
|
||||
* @param pos start position in x; must be non-negative and <= x.length().
|
||||
* @param len length of the substring;
|
||||
* must be non-negative and will be pinned to at most x.length() - pos.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const StringPiece& x, int32_t pos, int32_t len);
|
||||
|
||||
/**
|
||||
* Returns the string pointer. May be nullptr if it is empty.
|
||||
*
|
||||
* data() may return a pointer to a buffer with embedded NULs, and the
|
||||
* returned buffer may or may not be null terminated. Therefore it is
|
||||
* typically a mistake to pass data() to a routine that expects a NUL
|
||||
* terminated string.
|
||||
* @return the string pointer
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
const char* data() const { return ptr_; }
|
||||
/**
|
||||
* Returns the string length. Same as length().
|
||||
* @return the string length
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
int32_t size() const { return length_; }
|
||||
/**
|
||||
* Returns the string length. Same as size().
|
||||
* @return the string length
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
int32_t length() const { return length_; }
|
||||
/**
|
||||
* Returns whether the string is empty.
|
||||
* @return true if the string is empty
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
UBool empty() const { return length_ == 0; }
|
||||
|
||||
/**
|
||||
* Sets to an empty string.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
void clear() { ptr_ = nullptr; length_ = 0; }
|
||||
|
||||
/**
|
||||
* Reset the stringpiece to refer to new data.
|
||||
* @param xdata pointer the new string data. Need not be nul terminated.
|
||||
* @param len the length of the new data
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
|
||||
|
||||
/**
|
||||
* Reset the stringpiece to refer to new data.
|
||||
* @param str a pointer to a NUL-terminated string.
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
void set(const char* str);
|
||||
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param xdata pointer the new string data. Need not be NUL-terminated.
|
||||
* @param len the length of the new data
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* xdata, int32_t len) {
|
||||
set(reinterpret_cast<const char*>(xdata), len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param str a pointer to a NUL-terminated string.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* str) {
|
||||
set(reinterpret_cast<const char*>(str));
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Removes the first n string units.
|
||||
* @param n prefix length, must be non-negative and <=length()
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
void remove_prefix(int32_t n) {
|
||||
if (n >= 0) {
|
||||
if (n > length_) {
|
||||
n = length_;
|
||||
}
|
||||
ptr_ += n;
|
||||
length_ -= n;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the last n string units.
|
||||
* @param n suffix length, must be non-negative and <=length()
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
void remove_suffix(int32_t n) {
|
||||
if (n >= 0) {
|
||||
if (n <= length_) {
|
||||
length_ -= n;
|
||||
} else {
|
||||
length_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches the StringPiece for the given search string (needle);
|
||||
* @param needle The string for which to search.
|
||||
* @param offset Where to start searching within this string (haystack).
|
||||
* @return The offset of needle in haystack, or -1 if not found.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t find(StringPiece needle, int32_t offset);
|
||||
|
||||
/**
|
||||
* Compares this StringPiece with the other StringPiece, with semantics
|
||||
* similar to std::string::compare().
|
||||
* @param other The string to compare to.
|
||||
* @return below zero if this < other; above zero if this > other; 0 if this == other.
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t compare(StringPiece other);
|
||||
|
||||
/**
|
||||
* Maximum integer, used as a default value for substring methods.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
static const int32_t npos; // = 0x7fffffff;
|
||||
|
||||
/**
|
||||
* Returns a substring of this StringPiece.
|
||||
* @param pos start position; must be non-negative and <= length().
|
||||
* @param len length of the substring;
|
||||
* must be non-negative and will be pinned to at most length() - pos.
|
||||
* @return the substring StringPiece
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece substr(int32_t pos, int32_t len = npos) const {
|
||||
return StringPiece(*this, pos, len);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Global operator == for StringPiece
|
||||
* @param x The first StringPiece to compare.
|
||||
* @param y The second StringPiece to compare.
|
||||
* @return true if the string data is equal
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
U_EXPORT UBool U_EXPORT2
|
||||
operator==(const StringPiece& x, const StringPiece& y);
|
||||
|
||||
/**
|
||||
* Global operator != for StringPiece
|
||||
* @param x The first StringPiece to compare.
|
||||
* @param y The second StringPiece to compare.
|
||||
* @return true if the string data is not equal
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
inline UBool operator!=(const StringPiece& x, const StringPiece& y) {
|
||||
return !(x == y);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif // __STRINGPIECE_H__
|
||||
@@ -1,650 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1996-2015, International Business Machines Corporation and others.
|
||||
* All Rights Reserved.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UBRK_H
|
||||
#define UBRK_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/utext.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* A text-break iterator.
|
||||
* For usage in C programs.
|
||||
*/
|
||||
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
# define UBRK_TYPEDEF_UBREAK_ITERATOR
|
||||
/**
|
||||
* Opaque type representing an ICU Break iterator object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UBreakIterator UBreakIterator;
|
||||
#endif
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: BreakIterator
|
||||
*
|
||||
* <h2> BreakIterator C API </h2>
|
||||
*
|
||||
* The BreakIterator C API defines methods for finding the location
|
||||
* of boundaries in text. Pointer to a UBreakIterator maintain a
|
||||
* current position and scan over text returning the index of characters
|
||||
* where boundaries occur.
|
||||
* <p>
|
||||
* Line boundary analysis determines where a text string can be broken
|
||||
* when line-wrapping. The mechanism correctly handles punctuation and
|
||||
* hyphenated words.
|
||||
* <p>
|
||||
* Note: The locale keyword "lb" can be used to modify line break
|
||||
* behavior according to the CSS level 3 line-break options, see
|
||||
* <http://dev.w3.org/csswg/css-text/#line-breaking>. For example:
|
||||
* "ja@lb=strict", "zh@lb=loose".
|
||||
* <p>
|
||||
* Sentence boundary analysis allows selection with correct
|
||||
* interpretation of periods within numbers and abbreviations, and
|
||||
* trailing punctuation marks such as quotation marks and parentheses.
|
||||
* <p>
|
||||
* Note: The locale keyword "ss" can be used to enable use of
|
||||
* segmentation suppression data (preventing breaks in English after
|
||||
* abbreviations such as "Mr." or "Est.", for example), as follows:
|
||||
* "en@ss=standard".
|
||||
* <p>
|
||||
* Word boundary analysis is used by search and replace functions, as
|
||||
* well as within text editing applications that allow the user to
|
||||
* select words with a double click. Word selection provides correct
|
||||
* interpretation of punctuation marks within and following
|
||||
* words. Characters that are not part of a word, such as symbols or
|
||||
* punctuation marks, have word-breaks on both sides.
|
||||
* <p>
|
||||
* Character boundary analysis identifies the boundaries of
|
||||
* "Extended Grapheme Clusters", which are groupings of codepoints
|
||||
* that should be treated as character-like units for many text operations.
|
||||
* Please see Unicode Standard Annex #29, Unicode Text Segmentation,
|
||||
* http://www.unicode.org/reports/tr29/ for additional information
|
||||
* on grapheme clusters and guidelines on their use.
|
||||
* <p>
|
||||
* Title boundary analysis locates all positions,
|
||||
* typically starts of words, that should be set to Title Case
|
||||
* when title casing the text.
|
||||
* <p>
|
||||
* The text boundary positions are found according to the rules
|
||||
* described in Unicode Standard Annex #29, Text Boundaries, and
|
||||
* Unicode Standard Annex #14, Line Breaking Properties. These
|
||||
* are available at http://www.unicode.org/reports/tr14/ and
|
||||
* http://www.unicode.org/reports/tr29/.
|
||||
* <p>
|
||||
* In addition to the plain C API defined in this header file, an
|
||||
* object oriented C++ API with equivalent functionality is defined in the
|
||||
* file brkiter.h.
|
||||
* <p>
|
||||
* Code snippets illustrating the use of the Break Iterator APIs
|
||||
* are available in the ICU User Guide,
|
||||
* http://icu-project.org/userguide/boundaryAnalysis.html
|
||||
* and in the sample program icu/source/samples/break/break.cpp
|
||||
*/
|
||||
|
||||
/** The possible types of text boundaries. @stable ICU 2.0 */
|
||||
typedef enum UBreakIteratorType {
|
||||
/** Character breaks @stable ICU 2.0 */
|
||||
UBRK_CHARACTER = 0,
|
||||
/** Word breaks @stable ICU 2.0 */
|
||||
UBRK_WORD = 1,
|
||||
/** Line breaks @stable ICU 2.0 */
|
||||
UBRK_LINE = 2,
|
||||
/** Sentence breaks @stable ICU 2.0 */
|
||||
UBRK_SENTENCE = 3,
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Title Case breaks
|
||||
* The iterator created using this type locates title boundaries as described for
|
||||
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
|
||||
* please use Word Boundary iterator.
|
||||
*
|
||||
* @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
|
||||
*/
|
||||
UBRK_TITLE = 4,
|
||||
/**
|
||||
* One more than the highest normal UBreakIteratorType value.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UBRK_COUNT = 5
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UBreakIteratorType;
|
||||
|
||||
/** Value indicating all text boundaries have been returned.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UBRK_DONE ((int32_t) -1)
|
||||
|
||||
|
||||
/**
|
||||
* Enum constants for the word break tags returned by
|
||||
* getRuleStatus(). A range of values is defined for each category of
|
||||
* word, to allow for further subdivisions of a category in future releases.
|
||||
* Applications should check for tag values falling within the range, rather
|
||||
* than for single individual values.
|
||||
*
|
||||
* The numeric values of all of these constants are stable (will not change).
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
typedef enum UWordBreak {
|
||||
/** Tag value for "words" that do not fit into any of other categories.
|
||||
* Includes spaces and most punctuation. */
|
||||
UBRK_WORD_NONE = 0,
|
||||
/** Upper bound for tags for uncategorized words. */
|
||||
UBRK_WORD_NONE_LIMIT = 100,
|
||||
/** Tag value for words that appear to be numbers, lower limit. */
|
||||
UBRK_WORD_NUMBER = 100,
|
||||
/** Tag value for words that appear to be numbers, upper limit. */
|
||||
UBRK_WORD_NUMBER_LIMIT = 200,
|
||||
/** Tag value for words that contain letters, excluding
|
||||
* hiragana, katakana or ideographic characters, lower limit. */
|
||||
UBRK_WORD_LETTER = 200,
|
||||
/** Tag value for words containing letters, upper limit */
|
||||
UBRK_WORD_LETTER_LIMIT = 300,
|
||||
/** Tag value for words containing kana characters, lower limit */
|
||||
UBRK_WORD_KANA = 300,
|
||||
/** Tag value for words containing kana characters, upper limit */
|
||||
UBRK_WORD_KANA_LIMIT = 400,
|
||||
/** Tag value for words containing ideographic characters, lower limit */
|
||||
UBRK_WORD_IDEO = 400,
|
||||
/** Tag value for words containing ideographic characters, upper limit */
|
||||
UBRK_WORD_IDEO_LIMIT = 500
|
||||
} UWordBreak;
|
||||
|
||||
/**
|
||||
* Enum constants for the line break tags returned by getRuleStatus().
|
||||
* A range of values is defined for each category of
|
||||
* word, to allow for further subdivisions of a category in future releases.
|
||||
* Applications should check for tag values falling within the range, rather
|
||||
* than for single individual values.
|
||||
*
|
||||
* The numeric values of all of these constants are stable (will not change).
|
||||
*
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef enum ULineBreakTag {
|
||||
/** Tag value for soft line breaks, positions at which a line break
|
||||
* is acceptable but not required */
|
||||
UBRK_LINE_SOFT = 0,
|
||||
/** Upper bound for soft line breaks. */
|
||||
UBRK_LINE_SOFT_LIMIT = 100,
|
||||
/** Tag value for a hard, or mandatory line break */
|
||||
UBRK_LINE_HARD = 100,
|
||||
/** Upper bound for hard line breaks. */
|
||||
UBRK_LINE_HARD_LIMIT = 200
|
||||
} ULineBreakTag;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Enum constants for the sentence break tags returned by getRuleStatus().
|
||||
* A range of values is defined for each category of
|
||||
* sentence, to allow for further subdivisions of a category in future releases.
|
||||
* Applications should check for tag values falling within the range, rather
|
||||
* than for single individual values.
|
||||
*
|
||||
* The numeric values of all of these constants are stable (will not change).
|
||||
*
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
typedef enum USentenceBreakTag {
|
||||
/** Tag value for for sentences ending with a sentence terminator
|
||||
* ('.', '?', '!', etc.) character, possibly followed by a
|
||||
* hard separator (CR, LF, PS, etc.)
|
||||
*/
|
||||
UBRK_SENTENCE_TERM = 0,
|
||||
/** Upper bound for tags for sentences ended by sentence terminators. */
|
||||
UBRK_SENTENCE_TERM_LIMIT = 100,
|
||||
/** Tag value for for sentences that do not contain an ending
|
||||
* sentence terminator ('.', '?', '!', etc.) character, but
|
||||
* are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
|
||||
*/
|
||||
UBRK_SENTENCE_SEP = 100,
|
||||
/** Upper bound for tags for sentences ended by a separator. */
|
||||
UBRK_SENTENCE_SEP_LIMIT = 200
|
||||
/** Tag value for a hard, or mandatory line break */
|
||||
} USentenceBreakTag;
|
||||
|
||||
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries for a specified locale.
|
||||
* A UBreakIterator may be used for detecting character, line, word,
|
||||
* and sentence breaks in text.
|
||||
* @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
|
||||
* UBRK_LINE, UBRK_SENTENCE
|
||||
* @param locale The locale specifying the text-breaking conventions. Note that
|
||||
* locale keys such as "lb" and "ss" may be used to modify text break behavior,
|
||||
* see general discussion of BreakIterator C API.
|
||||
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
|
||||
* used to specify the text to be iterated.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated.
|
||||
* @param status A UErrorCode to receive any errors.
|
||||
* @return A UBreakIterator for the specified locale.
|
||||
* @see ubrk_openRules
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_open(UBreakIteratorType type,
|
||||
const char *locale,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries using specified breaking rules.
|
||||
* The rule syntax is ... (TBD)
|
||||
* @param rules A set of rules specifying the text breaking conventions.
|
||||
* @param rulesLength The number of characters in rules, or -1 if null-terminated.
|
||||
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
|
||||
* used to specify the text to be iterated.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated.
|
||||
* @param parseErr Receives position and context information for any syntax errors
|
||||
* detected while parsing the rules.
|
||||
* @param status A UErrorCode to receive any errors.
|
||||
* @return A UBreakIterator for the specified rules.
|
||||
* @see ubrk_open
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_openRules(const UChar *rules,
|
||||
int32_t rulesLength,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UParseError *parseErr,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
|
||||
* Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
|
||||
* Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
|
||||
* compatible across different major versions of ICU, nor across platforms of different
|
||||
* endianness or different base character set family (ASCII vs EBCDIC).
|
||||
* @param binaryRules A set of compiled binary rules specifying the text breaking
|
||||
* conventions. Ownership of the storage containing the compiled
|
||||
* rules remains with the caller of this function. The compiled
|
||||
* rules must not be modified or deleted during the life of the
|
||||
* break iterator.
|
||||
* @param rulesLength The length of binaryRules in bytes; must be >= 0.
|
||||
* @param text The text to be iterated over. May be null, in which case
|
||||
* ubrk_setText() is used to specify the text to be iterated.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated.
|
||||
* @param status Pointer to UErrorCode to receive any errors.
|
||||
* @return UBreakIterator for the specified rules.
|
||||
* @see ubrk_getBinaryRules
|
||||
* @stable ICU 59
|
||||
*/
|
||||
U_CAPI UBreakIterator* U_EXPORT2
|
||||
ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
|
||||
const UChar * text, int32_t textLength,
|
||||
UErrorCode * status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation
|
||||
* @param bi iterator to be cloned
|
||||
* @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
|
||||
* user allocated space for the new clone. If NULL new memory will be allocated.
|
||||
* If buffer is not large enough, new memory will be allocated.
|
||||
* Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
|
||||
* @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
|
||||
* pointer to size of allocated space.
|
||||
* If *pBufferSize == 0, a sufficient size for use in cloning will
|
||||
* be returned ('pre-flighting')
|
||||
* If *pBufferSize is not enough for a stack-based safe clone,
|
||||
* new memory will be allocated.
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
|
||||
* @return pointer to the new clone
|
||||
* @deprecated ICU 69 Use ubrk_clone() instead.
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_safeClone(
|
||||
const UBreakIterator *bi,
|
||||
void *stackBuffer,
|
||||
int32_t *pBufferSize,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation.
|
||||
* @param bi iterator to be cloned
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* @return pointer to the new clone
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_clone(const UBreakIterator *bi,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
|
||||
* @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
|
||||
*/
|
||||
#define U_BRK_SAFECLONE_BUFFERSIZE 1
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
/**
|
||||
* Close a UBreakIterator.
|
||||
* Once closed, a UBreakIterator may no longer be used.
|
||||
* @param bi The break iterator to close.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_close(UBreakIterator *bi);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUBreakIteratorPointer
|
||||
* "Smart pointer" class, closes a UBreakIterator via ubrk_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Sets an existing iterator to point to a new piece of text.
|
||||
* The break iterator retains a pointer to the supplied text.
|
||||
* The caller must not modify or delete the text while the BreakIterator
|
||||
* retains the reference.
|
||||
*
|
||||
* @param bi The iterator to use
|
||||
* @param text The text to be set
|
||||
* @param textLength The length of the text
|
||||
* @param status The error code
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_setText(UBreakIterator* bi,
|
||||
const UChar* text,
|
||||
int32_t textLength,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* Sets an existing iterator to point to a new piece of text.
|
||||
*
|
||||
* All index positions returned by break iterator functions are
|
||||
* native indices from the UText. For example, when breaking UTF-8
|
||||
* encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
|
||||
* will be UTF-8 string indices, not UTF-16 positions.
|
||||
*
|
||||
* @param bi The iterator to use
|
||||
* @param text The text to be set.
|
||||
* This function makes a shallow clone of the supplied UText. This means
|
||||
* that the caller is free to immediately close or otherwise reuse the
|
||||
* UText that was passed as a parameter, but that the underlying text itself
|
||||
* must not be altered while being referenced by the break iterator.
|
||||
* @param status The error code
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_setUText(UBreakIterator* bi,
|
||||
UText* text,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Determine the most recently-returned text boundary.
|
||||
*
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
|
||||
* \ref ubrk_first, or \ref ubrk_last.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_current(const UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Advance the iterator to the boundary following the current boundary.
|
||||
*
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character index of the next text boundary, or UBRK_DONE
|
||||
* if all text boundaries have been returned.
|
||||
* @see ubrk_previous
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_next(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to the boundary preceding the current boundary.
|
||||
*
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character index of the preceding text boundary, or UBRK_DONE
|
||||
* if all text boundaries have been returned.
|
||||
* @see ubrk_next
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_previous(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to zero, the start of the text being scanned.
|
||||
* @param bi The break iterator to use.
|
||||
* @return The new iterator position (zero).
|
||||
* @see ubrk_last
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_first(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
|
||||
* This is not the same as the last character.
|
||||
* @param bi The break iterator to use.
|
||||
* @return The character offset immediately <EM>beyond</EM> the last character in the
|
||||
* text being scanned.
|
||||
* @see ubrk_first
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_last(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Set the iterator position to the first boundary preceding the specified offset.
|
||||
* The new position is always smaller than offset, or UBRK_DONE.
|
||||
* @param bi The break iterator to use.
|
||||
* @param offset The offset to begin scanning.
|
||||
* @return The text boundary preceding offset, or UBRK_DONE.
|
||||
* @see ubrk_following
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_preceding(UBreakIterator *bi,
|
||||
int32_t offset);
|
||||
|
||||
/**
|
||||
* Advance the iterator to the first boundary following the specified offset.
|
||||
* The value returned is always greater than offset, or UBRK_DONE.
|
||||
* @param bi The break iterator to use.
|
||||
* @param offset The offset to begin scanning.
|
||||
* @return The text boundary following offset, or UBRK_DONE.
|
||||
* @see ubrk_preceding
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_following(UBreakIterator *bi,
|
||||
int32_t offset);
|
||||
|
||||
/**
|
||||
* Get a locale for which text breaking information is available.
|
||||
* A UBreakIterator in a locale returned by this function will perform the correct
|
||||
* text breaking for the locale.
|
||||
* @param index The index of the desired locale.
|
||||
* @return A locale for which number text breaking information is available, or 0 if none.
|
||||
* @see ubrk_countAvailable
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ubrk_getAvailable(int32_t index);
|
||||
|
||||
/**
|
||||
* Determine how many locales have text breaking information available.
|
||||
* This function is most useful as determining the loop ending condition for
|
||||
* calls to \ref ubrk_getAvailable.
|
||||
* @return The number of locales for which text breaking information is available.
|
||||
* @see ubrk_getAvailable
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_countAvailable(void);
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if the specified position is a boundary position. As a side
|
||||
* effect, leaves the iterator pointing to the first boundary position at
|
||||
* or after "offset".
|
||||
* @param bi The break iterator to use.
|
||||
* @param offset the offset to check.
|
||||
* @return True if "offset" is a boundary position.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
|
||||
|
||||
/**
|
||||
* Return the status from the break rule that determined the most recently
|
||||
* returned break position. The values appear in the rule source
|
||||
* within brackets, {123}, for example. For rules that do not specify a
|
||||
* status, a default value of 0 is returned.
|
||||
* <p>
|
||||
* For word break iterators, the possible values are defined in enum UWordBreak.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getRuleStatus(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Get the statuses from the break rules that determined the most recently
|
||||
* returned break position. The values appear in the rule source
|
||||
* within brackets, {123}, for example. The default status value for rules
|
||||
* that do not explicitly provide one is zero.
|
||||
* <p>
|
||||
* For word break iterators, the possible values are defined in enum UWordBreak.
|
||||
* @param bi The break iterator to use
|
||||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the most recent boundary returned by the break iterator.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Return the locale of the break iterator. You can choose between the valid and
|
||||
* the actual locale.
|
||||
* @param bi break iterator
|
||||
* @param type locale type (valid or actual)
|
||||
* @param status error code
|
||||
* @return locale string
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized
|
||||
* system-level code. One example use case is with garbage collection
|
||||
* that moves the text in memory.
|
||||
*
|
||||
* @param bi The break iterator.
|
||||
* @param text The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
*
|
||||
* @stable ICU 49
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubrk_refreshUText(UBreakIterator *bi,
|
||||
UText *text,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
|
||||
* The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
|
||||
* more quickly than using ubrk_openRules. The compiled rules are not compatible across
|
||||
* different major versions of ICU, nor across platforms of different endianness or
|
||||
* different base character set family (ASCII vs EBCDIC). Supports preflighting (with
|
||||
* binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
|
||||
* the binaryRules buffer. However, whether preflighting or not, if the actual length
|
||||
* is greater than INT32_MAX, then the function returns 0 and sets *status to
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR.
|
||||
|
||||
* @param bi The break iterator to use.
|
||||
* @param binaryRules Buffer to receive the compiled binary rules; set to NULL for
|
||||
* preflighting.
|
||||
* @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
|
||||
* preflighting. Must be >= 0.
|
||||
* @param status Pointer to UErrorCode to receive any errors, such as
|
||||
* U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
* @return The actual byte length of the binary rules, if <= INT32_MAX;
|
||||
* otherwise 0. If not preflighting and this is larger than
|
||||
* rulesCapacity, *status will be set to an error.
|
||||
* @see ubrk_openBinaryRules
|
||||
* @stable ICU 59
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getBinaryRules(UBreakIterator *bi,
|
||||
uint8_t * binaryRules, int32_t rulesCapacity,
|
||||
UErrorCode * status);
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,465 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
*
|
||||
* ucnv_err.h:
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C UConverter predefined error callbacks
|
||||
*
|
||||
* <h2>Error Behaviour Functions</h2>
|
||||
* Defines some error behaviour functions called by ucnv_{from,to}Unicode
|
||||
* These are provided as part of ICU and many are stable, but they
|
||||
* can also be considered only as an example of what can be done with
|
||||
* callbacks. You may of course write your own.
|
||||
*
|
||||
* If you want to write your own, you may also find the functions from
|
||||
* ucnv_cb.h useful when writing your own callbacks.
|
||||
*
|
||||
* These functions, although public, should NEVER be called directly.
|
||||
* They should be used as parameters to the ucnv_setFromUCallback
|
||||
* and ucnv_setToUCallback functions, to set the behaviour of a converter
|
||||
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
|
||||
*
|
||||
* usage example: 'STOP' doesn't need any context, but newContext
|
||||
* could be set to something other than 'NULL' if needed. The available
|
||||
* contexts in this header can modify the default behavior of the callback.
|
||||
*
|
||||
* \code
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* UConverter *myConverter = ucnv_open("ibm-949", &err);
|
||||
* const void *oldContext;
|
||||
* UConverterFromUCallback oldAction;
|
||||
*
|
||||
*
|
||||
* if (U_SUCCESS(err))
|
||||
* {
|
||||
* ucnv_setFromUCallBack(myConverter,
|
||||
* UCNV_FROM_U_CALLBACK_STOP,
|
||||
* NULL,
|
||||
* &oldAction,
|
||||
* &oldContext,
|
||||
* &status);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* The code above tells "myConverter" to stop when it encounters an
|
||||
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
|
||||
* Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
|
||||
* and ucnv_setToUCallBack would need to be called in order to change
|
||||
* that behavior too.
|
||||
*
|
||||
* Here is an example with a context:
|
||||
*
|
||||
* \code
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* UConverter *myConverter = ucnv_open("ibm-949", &err);
|
||||
* const void *oldContext;
|
||||
* UConverterFromUCallback oldAction;
|
||||
*
|
||||
*
|
||||
* if (U_SUCCESS(err))
|
||||
* {
|
||||
* ucnv_setToUCallBack(myConverter,
|
||||
* UCNV_TO_U_CALLBACK_SUBSTITUTE,
|
||||
* UCNV_SUB_STOP_ON_ILLEGAL,
|
||||
* &oldAction,
|
||||
* &oldContext,
|
||||
* &status);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* The code above tells "myConverter" to stop when it encounters an
|
||||
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
|
||||
* Codepage -> Unicode. Any unmapped and legal characters will be
|
||||
* substituted to be the default substitution character.
|
||||
*/
|
||||
|
||||
#ifndef UCNV_ERR_H
|
||||
#define UCNV_ERR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
|
||||
/** Forward declaring the UConverter structure. @stable ICU 2.0 */
|
||||
struct UConverter;
|
||||
|
||||
/** @stable ICU 2.0 */
|
||||
typedef struct UConverter UConverter;
|
||||
|
||||
/**
|
||||
* FROM_U, TO_U context options for sub callback
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_SUB_STOP_ON_ILLEGAL "i"
|
||||
|
||||
/**
|
||||
* FROM_U, TO_U context options for skip callback
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
|
||||
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_ICU NULL
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_JAVA "J"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
|
||||
* TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_C "C"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_XML_DEC "D"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_XML_HEX "X"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_UNICODE "U"
|
||||
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
|
||||
* a backslash, 1..6 hex digits, and a space)
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_CSS2 "S"
|
||||
|
||||
/**
|
||||
* The process condition code to be used with the callbacks.
|
||||
* Codes which are greater than UCNV_IRREGULAR should be
|
||||
* passed on to any chained callbacks.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef enum {
|
||||
UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
|
||||
The error code U_INVALID_CHAR_FOUND will be set. */
|
||||
UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
|
||||
\\x81\\x2E is illegal in SJIS because \\x2E
|
||||
is not a valid trail byte for the \\x81
|
||||
lead byte.
|
||||
Also, starting with Unicode 3.0.1, non-shortest byte sequences
|
||||
in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
|
||||
are also illegal, not just irregular.
|
||||
The error code U_ILLEGAL_CHAR_FOUND will be set. */
|
||||
UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
|
||||
the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
|
||||
are irregular UTF-8 byte sequences for single surrogate
|
||||
code points.
|
||||
The error code U_INVALID_CHAR_FOUND will be set. */
|
||||
UCNV_RESET = 3, /**< The callback is called with this reason when a
|
||||
'reset' has occurred. Callback should reset all
|
||||
state. */
|
||||
UCNV_CLOSE = 4, /**< Called when the converter is closed. The
|
||||
callback should release any allocated memory.*/
|
||||
UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
|
||||
converter. the pointer available as the
|
||||
'context' is an alias to the original converters'
|
||||
context pointer. If the context must be owned
|
||||
by the new converter, the callback must clone
|
||||
the data and call ucnv_setFromUCallback
|
||||
(or setToUCallback) with the correct pointer.
|
||||
@stable ICU 2.2
|
||||
*/
|
||||
} UConverterCallbackReason;
|
||||
|
||||
|
||||
/**
|
||||
* The structure for the fromUnicode callback function parameter.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct {
|
||||
uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
|
||||
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
|
||||
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
|
||||
const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
|
||||
const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
|
||||
char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
|
||||
const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
} UConverterFromUnicodeArgs;
|
||||
|
||||
|
||||
/**
|
||||
* The structure for the toUnicode callback function parameter.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct {
|
||||
uint16_t size; /**< The size of this struct @stable ICU 2.0 */
|
||||
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
|
||||
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
|
||||
const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
|
||||
const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
|
||||
UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
|
||||
const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
} UConverterToUnicodeArgs;
|
||||
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
*
|
||||
* @param context Pointer to the callback's private data
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err This should always be set to a failure status prior to calling.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
*
|
||||
* @param context Pointer to the callback's private data
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err This should always be set to a failure status prior to calling.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback skips any ILLEGAL_SEQUENCE, or
|
||||
* skips only UNASSINGED_SEQUENCE depending on the context parameter
|
||||
* simply ignoring those characters.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Skips any ILLEGAL_SEQUENCE
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
|
||||
* UNASSIGNED_SEQUENCE depending on context parameter, with the
|
||||
* current substitution string for the converter. This is the default
|
||||
* callback.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Substitutes any ILLEGAL_SEQUENCE
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @see ucnv_setSubstChars
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
|
||||
* hexadecimal representation of the illegal codepoints
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* <ul>
|
||||
* <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
|
||||
* representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
|
||||
* In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* %UD84D%UDC56</li>
|
||||
* <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
|
||||
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
|
||||
* In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* \\uD84D\\uDC56</li>
|
||||
* <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
|
||||
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
|
||||
* In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* \\U00023456</li>
|
||||
* <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
|
||||
* representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly.
|
||||
* In the Event the converter doesn't support the characters {&,#}[0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* &#144470; and Zero padding is ignored.</li>
|
||||
* <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
|
||||
* representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly.
|
||||
* In the Event the converter doesn't support the characters {&,#,x}[0-9],
|
||||
* it will substitute the illegal sequence with the substitution characters.
|
||||
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
||||
* \htmlonly&#x23456;\endhtmlonly</li>
|
||||
* </ul>
|
||||
* @param fromUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
|
||||
const void *context,
|
||||
UConverterFromUnicodeArgs *fromUArgs,
|
||||
const UChar* codeUnits,
|
||||
int32_t length,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback skips any ILLEGAL_SEQUENCE, or
|
||||
* skips only UNASSINGED_SEQUENCE depending on the context parameter
|
||||
* simply ignoring those characters.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Skips any ILLEGAL_SEQUENCE
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
|
||||
* UNASSIGNED_SEQUENCE depending on context parameter, with the
|
||||
* Unicode substitution character, U+FFFD.
|
||||
*
|
||||
* @param context The function currently recognizes the callback options:
|
||||
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
||||
* returning the error code back to the caller immediately.
|
||||
* NULL: Substitutes any ILLEGAL_SEQUENCE
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
||||
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
|
||||
* hexadecimal representation of the illegal bytes
|
||||
* (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
|
||||
*
|
||||
* @param context This function currently recognizes the callback options:
|
||||
* UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
|
||||
* UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
|
||||
* @param toUArgs Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
|
||||
* @param length Size (in bytes) of the concerned codepage sequence
|
||||
* @param reason Defines the reason the callback was invoked
|
||||
* @param err Return value will be set to success if the callback was handled,
|
||||
* otherwise this value will be set to a failure status.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
|
||||
const void *context,
|
||||
UConverterToUnicodeArgs *toUArgs,
|
||||
const char* codeUnits,
|
||||
int32_t length,
|
||||
UConverterCallbackReason reason,
|
||||
UErrorCode * err);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*UCNV_ERR_H*/
|
||||
@@ -1,456 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: uconfig.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002sep19
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UCONFIG_H__
|
||||
#define __UCONFIG_H__
|
||||
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief User-configurable settings
|
||||
*
|
||||
* Miscellaneous switches:
|
||||
*
|
||||
* A number of macros affect a variety of minor aspects of ICU.
|
||||
* Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
|
||||
* and moved here to make them easier to find.
|
||||
*
|
||||
* Switches for excluding parts of ICU library code modules:
|
||||
*
|
||||
* Changing these macros allows building partial, smaller libraries for special purposes.
|
||||
* By default, all modules are built.
|
||||
* The switches are fairly coarse, controlling large modules.
|
||||
* Basic services cannot be turned off.
|
||||
*
|
||||
* Building with any of these options does not guarantee that the
|
||||
* ICU build process will completely work. It is recommended that
|
||||
* the ICU libraries and data be built using the normal build.
|
||||
* At that time you should remove the data used by those services.
|
||||
* After building the ICU data library, you should rebuild the ICU
|
||||
* libraries with these switches customized to your needs.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
|
||||
* prior to determining default settings for uconfig variables.
|
||||
*
|
||||
* @internal ICU 4.0
|
||||
*/
|
||||
#if defined(UCONFIG_USE_LOCAL)
|
||||
#include "uconfig_local.h"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_DEBUG
|
||||
* Determines whether to include debugging code.
|
||||
* Automatically set on Windows, but most compilers do not have
|
||||
* related predefined macros.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_DEBUG
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_DEBUG)
|
||||
/*
|
||||
* _DEBUG is defined by Visual Studio debug compilation.
|
||||
* Do *not* test for its NDEBUG macro: It is an orthogonal macro
|
||||
* which disables assert().
|
||||
*/
|
||||
# define U_DEBUG 1
|
||||
# else
|
||||
# define U_DEBUG 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Determines whether to enable auto cleanup of libraries.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCLN_NO_AUTO_CLEANUP
|
||||
#define UCLN_NO_AUTO_CLEANUP 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_DISABLE_RENAMING
|
||||
* Determines whether to disable renaming or not.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DISABLE_RENAMING
|
||||
#define U_DISABLE_RENAMING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
* Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
|
||||
* utypes.h includes those headers if this macro is defined to 0.
|
||||
* Otherwise, each those headers must be included explicitly when using one of their macros.
|
||||
* Defaults to 0 for backward compatibility, except inside ICU.
|
||||
* @stable ICU 49
|
||||
*/
|
||||
#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
|
||||
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
|
||||
defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
|
||||
#else
|
||||
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_OVERRIDE_CXX_ALLOCATION
|
||||
* Determines whether to override new and delete.
|
||||
* ICU is normally built such that all of its C++ classes, via their UMemory base,
|
||||
* override operators new and delete to use its internal, customizable,
|
||||
* non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
|
||||
*
|
||||
* This is especially important when the application and its libraries use multiple heaps.
|
||||
* For example, on Windows, this allows the ICU DLL to be used by
|
||||
* applications that statically link the C Runtime library.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#ifndef U_OVERRIDE_CXX_ALLOCATION
|
||||
#define U_OVERRIDE_CXX_ALLOCATION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_ENABLE_TRACING
|
||||
* Determines whether to enable tracing.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_ENABLE_TRACING
|
||||
#define U_ENABLE_TRACING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ENABLE_PLUGINS
|
||||
* Determines whether to enable ICU plugins.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_ENABLE_PLUGINS
|
||||
#define UCONFIG_ENABLE_PLUGINS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_ENABLE_DYLOAD
|
||||
* Whether to enable Dynamic loading in ICU.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_ENABLE_DYLOAD
|
||||
#define U_ENABLE_DYLOAD 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_CHECK_DYLOAD
|
||||
* Whether to test Dynamic loading as an OS capability.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_CHECK_DYLOAD
|
||||
#define U_CHECK_DYLOAD 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_DEFAULT_SHOW_DRAFT
|
||||
* Do we allow ICU users to use the draft APIs by default?
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DEFAULT_SHOW_DRAFT
|
||||
#define U_DEFAULT_SHOW_DRAFT 1
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Custom icu entry point renaming */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_HAVE_LIB_SUFFIX
|
||||
* 1 if a custom library suffix is set.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_LIB_SUFFIX
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN)
|
||||
# define U_HAVE_LIB_SUFFIX 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_LIB_SUFFIX_C_NAME_STRING
|
||||
* Defines the library suffix as a string with C syntax.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_LIB_SUFFIX_C_NAME_STRING
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_LIB_SUFFIX_C_NAME)
|
||||
# define CONVERT_TO_STRING(s) #s
|
||||
# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
|
||||
#else
|
||||
# define U_LIB_SUFFIX_C_NAME_STRING ""
|
||||
#endif
|
||||
|
||||
/* common/i18n library switches --------------------------------------------- */
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ONLY_COLLATION
|
||||
* This switch turns off modules that are not needed for collation.
|
||||
*
|
||||
* It does not turn off legacy conversion because that is necessary
|
||||
* for ICU to work on EBCDIC platforms (for the default converter).
|
||||
* If you want "only collation" and do not build for EBCDIC,
|
||||
* then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_ONLY_COLLATION
|
||||
# define UCONFIG_ONLY_COLLATION 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_ONLY_COLLATION
|
||||
/* common library */
|
||||
# define UCONFIG_NO_BREAK_ITERATION 1
|
||||
# define UCONFIG_NO_IDNA 1
|
||||
|
||||
/* i18n library */
|
||||
# if UCONFIG_NO_COLLATION
|
||||
# error Contradictory collation switches in uconfig.h.
|
||||
# endif
|
||||
# define UCONFIG_NO_FORMATTING 1
|
||||
# define UCONFIG_NO_TRANSLITERATION 1
|
||||
# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
|
||||
#endif
|
||||
|
||||
/* common library switches -------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FILE_IO
|
||||
* This switch turns off all file access in the common library
|
||||
* where file access is only used for data loading.
|
||||
* ICU data must then be provided in the form of a data DLL (or with an
|
||||
* equivalent way to link to the data residing in an executable,
|
||||
* as in building a combined library with both the common library's code and
|
||||
* the data), or via udata_setCommonData().
|
||||
* Application data must be provided via udata_setAppData() or by using
|
||||
* "open" functions that take pointers to data, for example ucol_openBinary().
|
||||
*
|
||||
* File access is not used at all in the i18n library.
|
||||
*
|
||||
* File access cannot be turned off for the icuio library or for the ICU
|
||||
* test suites and ICU tools.
|
||||
*
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
#ifndef UCONFIG_NO_FILE_IO
|
||||
# define UCONFIG_NO_FILE_IO 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR)
|
||||
# error Contradictory file io switches in uconfig.h.
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_CONVERSION
|
||||
* ICU will not completely build (compiling the tools fails) with this
|
||||
* switch turned on.
|
||||
* This switch turns off all converters.
|
||||
*
|
||||
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
|
||||
* in utypes.h if char* strings in your environment are always in UTF-8.
|
||||
*
|
||||
* @stable ICU 3.2
|
||||
* @see U_CHARSET_IS_UTF8
|
||||
*/
|
||||
#ifndef UCONFIG_NO_CONVERSION
|
||||
# define UCONFIG_NO_CONVERSION 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_CONVERSION
|
||||
# define UCONFIG_NO_LEGACY_CONVERSION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ONLY_HTML_CONVERSION
|
||||
* This switch turns off all of the converters NOT listed in
|
||||
* the HTML encoding standard:
|
||||
* http://www.w3.org/TR/encoding/#names-and-labels
|
||||
*
|
||||
* This is not possible on EBCDIC platforms
|
||||
* because they need ibm-37 or ibm-1047 default converters.
|
||||
*
|
||||
* @stable ICU 55
|
||||
*/
|
||||
#ifndef UCONFIG_ONLY_HTML_CONVERSION
|
||||
# define UCONFIG_ONLY_HTML_CONVERSION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_LEGACY_CONVERSION
|
||||
* This switch turns off all converters except for
|
||||
* - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
|
||||
* - US-ASCII
|
||||
* - ISO-8859-1
|
||||
*
|
||||
* Turning off legacy conversion is not possible on EBCDIC platforms
|
||||
* because they need ibm-37 or ibm-1047 default converters.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_LEGACY_CONVERSION
|
||||
# define UCONFIG_NO_LEGACY_CONVERSION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_NORMALIZATION
|
||||
* This switch turns off normalization.
|
||||
* It implies turning off several other services as well, for example
|
||||
* collation and IDNA.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#ifndef UCONFIG_NO_NORMALIZATION
|
||||
# define UCONFIG_NO_NORMALIZATION 0
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
/* common library */
|
||||
/* ICU 50 CJK dictionary BreakIterator uses normalization */
|
||||
# define UCONFIG_NO_BREAK_ITERATION 1
|
||||
/* IDNA (UTS #46) is implemented via normalization */
|
||||
# define UCONFIG_NO_IDNA 1
|
||||
|
||||
/* i18n library */
|
||||
# if UCONFIG_ONLY_COLLATION
|
||||
# error Contradictory collation switches in uconfig.h.
|
||||
# endif
|
||||
# define UCONFIG_NO_COLLATION 1
|
||||
# define UCONFIG_NO_TRANSLITERATION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_BREAK_ITERATION
|
||||
* This switch turns off break iteration.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_BREAK_ITERATION
|
||||
# define UCONFIG_NO_BREAK_ITERATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_IDNA
|
||||
* This switch turns off IDNA.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#ifndef UCONFIG_NO_IDNA
|
||||
# define UCONFIG_NO_IDNA 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
* Determines the default UMessagePatternApostropheMode.
|
||||
* See the documentation for that enum.
|
||||
*
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_USE_WINDOWS_LCID_MAPPING_API
|
||||
* On platforms where U_PLATFORM_HAS_WIN32_API is true, this switch determines
|
||||
* if the Windows platform APIs are used for LCID<->Locale Name conversions.
|
||||
* Otherwise, only the built-in ICU tables are used.
|
||||
*
|
||||
* @internal ICU 64
|
||||
*/
|
||||
#ifndef UCONFIG_USE_WINDOWS_LCID_MAPPING_API
|
||||
# define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1
|
||||
#endif
|
||||
|
||||
/* i18n library switches ---------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_COLLATION
|
||||
* This switch turns off collation and collation-based string search.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_COLLATION
|
||||
# define UCONFIG_NO_COLLATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FORMATTING
|
||||
* This switch turns off formatting and calendar/timezone services.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_FORMATTING
|
||||
# define UCONFIG_NO_FORMATTING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_TRANSLITERATION
|
||||
* This switch turns off transliteration.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_TRANSLITERATION
|
||||
# define UCONFIG_NO_TRANSLITERATION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
* This switch turns off regular expressions.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_SERVICE
|
||||
* This switch turns off service registration.
|
||||
*
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
#ifndef UCONFIG_NO_SERVICE
|
||||
# define UCONFIG_NO_SERVICE 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_HAVE_PARSEALLINPUT
|
||||
* This switch turns on the "parse all input" attribute. Binary incompatible.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_HAVE_PARSEALLINPUT
|
||||
# define UCONFIG_HAVE_PARSEALLINPUT 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
* This switch turns off filtered break iteration code.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
|
||||
#endif
|
||||
|
||||
#endif // __UCONFIG_H__
|
||||
@@ -1,159 +0,0 @@
|
||||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// ucpmap.h
|
||||
// created: 2018sep03 Markus W. Scherer
|
||||
|
||||
#ifndef __UCPMAP_H__
|
||||
#define __UCPMAP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* This file defines an abstract map from Unicode code points to integer values.
|
||||
*
|
||||
* @see UCPMap
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
*/
|
||||
|
||||
/**
|
||||
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
||||
*
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef struct UCPMap UCPMap;
|
||||
|
||||
/**
|
||||
* Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
|
||||
* Most users should use UCPMAP_RANGE_NORMAL.
|
||||
*
|
||||
* @see ucpmap_getRange
|
||||
* @see ucptrie_getRange
|
||||
* @see umutablecptrie_getRange
|
||||
* @stable ICU 63
|
||||
*/
|
||||
enum UCPMapRangeOption {
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map.
|
||||
* Most users should use this option.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_NORMAL,
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
|
||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||
* The surrogateValue is not transformed via filter().
|
||||
* See U_IS_LEAD(c).
|
||||
*
|
||||
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||
*
|
||||
* This option is useful for maps that map surrogate code *units* to
|
||||
* special values optimized for UTF-16 string processing
|
||||
* or for special error behavior for unpaired surrogates,
|
||||
* but those values are not to be associated with the lead surrogate code *points*.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||
* except that all surrogates (U+D800..U+DFFF) are treated as having the
|
||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||
* The surrogateValue is not transformed via filter().
|
||||
* See U_IS_SURROGATE(c).
|
||||
*
|
||||
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||
*
|
||||
* This option is useful for maps that map surrogate code *units* to
|
||||
* special values optimized for UTF-16 string processing
|
||||
* or for special error behavior for unpaired surrogates,
|
||||
* but those values are not to be associated with the lead surrogate code *points*.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_FIXED_ALL_SURROGATES
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum UCPMapRangeOption UCPMapRangeOption;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the value for a code point as stored in the map, with range checking.
|
||||
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
|
||||
*
|
||||
* @param map the map
|
||||
* @param c the code point
|
||||
* @return the map value,
|
||||
* or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucpmap_get(const UCPMap *map, UChar32 c);
|
||||
|
||||
/**
|
||||
* Callback function type: Modifies a map value.
|
||||
* Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
|
||||
* The modified value will be returned by the getRange function.
|
||||
*
|
||||
* Can be used to ignore some of the value bits,
|
||||
* make a filter for one of several values,
|
||||
* return a value index computed from the map value, etc.
|
||||
*
|
||||
* @param context an opaque pointer, as passed into the getRange function
|
||||
* @param value a value from the map
|
||||
* @return the modified value
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef uint32_t U_CALLCONV
|
||||
UCPMapValueFilter(const void *context, uint32_t value);
|
||||
|
||||
/**
|
||||
* Returns the last code point such that all those from start to there have the same value.
|
||||
* Can be used to efficiently iterate over all same-value ranges in a map.
|
||||
* (This is normally faster than iterating over code points and get()ting each value,
|
||||
* but much slower than a data structure that stores ranges directly.)
|
||||
*
|
||||
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||
* the value to be delivered is passed through that function, and the return value is the end
|
||||
* of the range where all values are modified to the same actual value.
|
||||
* The value is unchanged if that function pointer is NULL.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* UChar32 start = 0, end;
|
||||
* uint32_t value;
|
||||
* while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
|
||||
* NULL, NULL, &value)) >= 0) {
|
||||
* // Work with the range start..end and its value.
|
||||
* start = end + 1;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param map the map
|
||||
* @param start range start
|
||||
* @param option defines whether surrogates are treated normally,
|
||||
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||
* @param filter a pointer to a function that may modify the map data value,
|
||||
* or NULL if the values from the map are to be used unmodified
|
||||
* @param context an opaque pointer that is passed on to the filter function
|
||||
* @param pValue if not NULL, receives the value that every code point start..end has;
|
||||
* may have been modified by filter(context, map value)
|
||||
* if that function pointer is not NULL
|
||||
* @return the range end code point, or -1 if start is not a valid code point
|
||||
* @stable ICU 63
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucpmap_getRange(const UCPMap *map, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
@@ -1,209 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uenum.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:2
|
||||
*
|
||||
* created on: 2002jul08
|
||||
* created by: Vladimir Weinstein
|
||||
*/
|
||||
|
||||
#ifndef __UENUM_H
|
||||
#define __UENUM_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/localpointer.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
class StringEnumeration;
|
||||
U_NAMESPACE_END
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: String Enumeration
|
||||
*/
|
||||
|
||||
/**
|
||||
* An enumeration object.
|
||||
* For usage in C programs.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
struct UEnumeration;
|
||||
/** structure representing an enumeration object instance @stable ICU 2.2 */
|
||||
typedef struct UEnumeration UEnumeration;
|
||||
|
||||
/**
|
||||
* Disposes of resources in use by the iterator. If en is NULL,
|
||||
* does nothing. After this call, any char* or UChar* pointer
|
||||
* returned by uenum_unext() or uenum_next() is invalid.
|
||||
* @param en UEnumeration structure pointer
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uenum_close(UEnumeration* en);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUEnumerationPointer
|
||||
* "Smart pointer" class, closes a UEnumeration via uenum_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the number of elements that the iterator traverses. If
|
||||
* the iterator is out-of-sync with its service, status is set to
|
||||
* U_ENUM_OUT_OF_SYNC_ERROR.
|
||||
* This is a convenience function. It can end up being very
|
||||
* expensive as all the items might have to be pre-fetched (depending
|
||||
* on the type of data being traversed). Use with caution and only
|
||||
* when necessary.
|
||||
* @param en UEnumeration structure pointer
|
||||
* @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
|
||||
* iterator is out of sync.
|
||||
* @return number of elements in the iterator
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uenum_count(UEnumeration* en, UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns the next element in the iterator's list. If there are
|
||||
* no more elements, returns NULL. If the iterator is out-of-sync
|
||||
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
|
||||
* NULL is returned. If the native service string is a char* string,
|
||||
* it is converted to UChar* with the invariant converter.
|
||||
* The result is terminated by (UChar)0.
|
||||
* @param en the iterator object
|
||||
* @param resultLength pointer to receive the length of the result
|
||||
* (not including the terminating \\0).
|
||||
* If the pointer is NULL it is ignored.
|
||||
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
|
||||
* the iterator is out of sync with its service.
|
||||
* @return a pointer to the string. The string will be
|
||||
* zero-terminated. The return pointer is owned by this iterator
|
||||
* and must not be deleted by the caller. The pointer is valid
|
||||
* until the next call to any uenum_... method, including
|
||||
* uenum_next() or uenum_unext(). When all strings have been
|
||||
* traversed, returns NULL.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
uenum_unext(UEnumeration* en,
|
||||
int32_t* resultLength,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns the next element in the iterator's list. If there are
|
||||
* no more elements, returns NULL. If the iterator is out-of-sync
|
||||
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
|
||||
* NULL is returned. If the native service string is a UChar*
|
||||
* string, it is converted to char* with the invariant converter.
|
||||
* The result is terminated by (char)0. If the conversion fails
|
||||
* (because a character cannot be converted) then status is set to
|
||||
* U_INVARIANT_CONVERSION_ERROR and the return value is undefined
|
||||
* (but non-NULL).
|
||||
* @param en the iterator object
|
||||
* @param resultLength pointer to receive the length of the result
|
||||
* (not including the terminating \\0).
|
||||
* If the pointer is NULL it is ignored.
|
||||
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
|
||||
* the iterator is out of sync with its service. Set to
|
||||
* U_INVARIANT_CONVERSION_ERROR if the underlying native string is
|
||||
* UChar* and conversion to char* with the invariant converter
|
||||
* fails. This error pertains only to current string, so iteration
|
||||
* might be able to continue successfully.
|
||||
* @return a pointer to the string. The string will be
|
||||
* zero-terminated. The return pointer is owned by this iterator
|
||||
* and must not be deleted by the caller. The pointer is valid
|
||||
* until the next call to any uenum_... method, including
|
||||
* uenum_next() or uenum_unext(). When all strings have been
|
||||
* traversed, returns NULL.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uenum_next(UEnumeration* en,
|
||||
int32_t* resultLength,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Resets the iterator to the current list of service IDs. This
|
||||
* re-establishes sync with the service and rewinds the iterator
|
||||
* to start at the first element.
|
||||
* @param en the iterator object
|
||||
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
|
||||
* the iterator is out of sync with its service.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uenum_reset(UEnumeration* en, UErrorCode* status);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* Given a StringEnumeration, wrap it in a UEnumeration. The
|
||||
* StringEnumeration is adopted; after this call, the caller must not
|
||||
* delete it (regardless of error status).
|
||||
* @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
|
||||
* @param ec the error code.
|
||||
* @return a UEnumeration wrapping the adopted StringEnumeration.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
|
||||
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
|
||||
* \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
|
||||
* @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
|
||||
* @param count length of the array
|
||||
* @param ec error code
|
||||
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
|
||||
* @see uenum_close
|
||||
* @stable ICU 50
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
|
||||
UErrorCode* ec);
|
||||
|
||||
/**
|
||||
* Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
|
||||
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
|
||||
* \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
|
||||
* @param strings array of char* strings (each null terminated). All storage is owned by the caller.
|
||||
* @param count length of the array
|
||||
* @param ec error code
|
||||
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
|
||||
* @see uenum_close
|
||||
* @stable ICU 50
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
|
||||
UErrorCode* ec);
|
||||
|
||||
#endif
|
||||
@@ -1,709 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2011 International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uiter.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jan18
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UITER_H__
|
||||
#define __UITER_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Unicode Character Iteration
|
||||
*
|
||||
* @see UCharIterator
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class CharacterIterator;
|
||||
class Replaceable;
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
struct UCharIterator;
|
||||
typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
|
||||
|
||||
/**
|
||||
* Origin constants for UCharIterator.getIndex() and UCharIterator.move().
|
||||
* @see UCharIteratorMove
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef enum UCharIteratorOrigin {
|
||||
UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
|
||||
} UCharIteratorOrigin;
|
||||
|
||||
/** Constants for UCharIterator. @stable ICU 2.6 */
|
||||
enum {
|
||||
/**
|
||||
* Constant value that may be returned by UCharIteratorMove
|
||||
* indicating that the final UTF-16 index is not known, but that the move succeeded.
|
||||
* This can occur when moving relative to limit or length, or
|
||||
* when moving relative to the current index after a setState()
|
||||
* when the current UTF-16 index is not known.
|
||||
*
|
||||
* It would be very inefficient to have to count from the beginning of the text
|
||||
* just to get the current/limit/length index after moving relative to it.
|
||||
* The actual index can be determined with getIndex(UITER_CURRENT)
|
||||
* which will count the UChars if necessary.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UITER_UNKNOWN_INDEX=-2
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Constant for UCharIterator getState() indicating an error or
|
||||
* an unknown state.
|
||||
* Returned by uiter_getState()/UCharIteratorGetState
|
||||
* when an error occurs.
|
||||
* Also, some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position. This will be clearly documented
|
||||
* for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define UITER_NO_STATE ((uint32_t)0xffffffff)
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.getIndex().
|
||||
*
|
||||
* Gets the current position, or the start or limit of the
|
||||
* iteration range.
|
||||
*
|
||||
* This function may perform slowly for UITER_CURRENT after setState() was called,
|
||||
* or for UITER_LENGTH, because an iterator implementation may have to count
|
||||
* UChars if the underlying storage is not UTF-16.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param origin get the 0, start, limit, length, or current index
|
||||
* @return the requested index, or U_SENTINEL in an error condition
|
||||
*
|
||||
* @see UCharIteratorOrigin
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.move().
|
||||
*
|
||||
* Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
|
||||
*
|
||||
* Moves the current position relative to the start or limit of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
* Out of bounds movement will be pinned to the start or limit.
|
||||
*
|
||||
* This function may perform slowly for moving relative to UITER_LENGTH
|
||||
* because an iterator implementation may have to count the rest of the
|
||||
* UChars if the native storage is not UTF-16.
|
||||
*
|
||||
* When moving relative to the limit or length, or
|
||||
* relative to the current position after setState() was called,
|
||||
* move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
|
||||
* determination of the actual UTF-16 index.
|
||||
* The actual index can be determined with getIndex(UITER_CURRENT)
|
||||
* which will count the UChars if necessary.
|
||||
* See UITER_UNKNOWN_INDEX for details.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param delta can be positive, zero, or negative
|
||||
* @param origin move relative to the 0, start, limit, length, or current index
|
||||
* @return the new index, or U_SENTINEL on an error condition,
|
||||
* or UITER_UNKNOWN_INDEX when the index is not known.
|
||||
*
|
||||
* @see UCharIteratorOrigin
|
||||
* @see UCharIterator
|
||||
* @see UITER_UNKNOWN_INDEX
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.hasNext().
|
||||
*
|
||||
* Check if current() and next() can still
|
||||
* return another code unit.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return boolean value for whether current() and next() can still return another code unit
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UBool U_CALLCONV
|
||||
UCharIteratorHasNext(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.hasPrevious().
|
||||
*
|
||||
* Check if previous() can still return another code unit.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return boolean value for whether previous() can still return another code unit
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UBool U_CALLCONV
|
||||
UCharIteratorHasPrevious(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.current().
|
||||
*
|
||||
* Return the code unit at the current position,
|
||||
* or U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code unit
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UChar32 U_CALLCONV
|
||||
UCharIteratorCurrent(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.next().
|
||||
*
|
||||
* Return the code unit at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code unit (and post-increment the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UChar32 U_CALLCONV
|
||||
UCharIteratorNext(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.previous().
|
||||
*
|
||||
* Decrement the index and return the code unit from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return U_SENTINEL if there is none (index is at the start).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the previous code unit (after pre-decrementing the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef UChar32 U_CALLCONV
|
||||
UCharIteratorPrevious(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.reservedFn().
|
||||
* Reserved for future use.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param something some integer argument
|
||||
* @return some integer
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UCharIteratorReserved(UCharIterator *iter, int32_t something);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.getState().
|
||||
*
|
||||
* Get the "state" of the iterator in the form of a single 32-bit word.
|
||||
* It is recommended that the state value be calculated to be as small as
|
||||
* is feasible. For strings with limited lengths, fewer than 32 bits may
|
||||
* be sufficient.
|
||||
*
|
||||
* This is used together with setState()/UCharIteratorSetState
|
||||
* to save and restore the iterator position more efficiently than with
|
||||
* getIndex()/move().
|
||||
*
|
||||
* The iterator state is defined as a uint32_t value because it is designed
|
||||
* for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
|
||||
* of the character iterator.
|
||||
*
|
||||
* With some UCharIterator implementations (e.g., UTF-8),
|
||||
* getting and setting the UTF-16 index with existing functions
|
||||
* (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
|
||||
* relatively slow because the iterator has to "walk" from a known index
|
||||
* to the requested one.
|
||||
* This takes more time the farther it needs to go.
|
||||
*
|
||||
* An opaque state value allows an iterator implementation to provide
|
||||
* an internal index (UTF-8: the source byte array index) for
|
||||
* fast, constant-time restoration.
|
||||
*
|
||||
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
|
||||
* the UTF-16 index may not be restored as well, but the iterator can deliver
|
||||
* the correct text contents and move relative to the current position
|
||||
* without performance degradation.
|
||||
*
|
||||
* Some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position, in which case they return UITER_NO_STATE instead.
|
||||
* This will be clearly documented for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the state word
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorSetState
|
||||
* @see UITER_NO_STATE
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef uint32_t U_CALLCONV
|
||||
UCharIteratorGetState(const UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Function type declaration for UCharIterator.setState().
|
||||
*
|
||||
* Restore the "state" of the iterator using a state word from a getState() call.
|
||||
* The iterator object need not be the same one as for which getState() was called,
|
||||
* but it must be of the same type (set up using the same uiter_setXYZ function)
|
||||
* and it must iterate over the same string
|
||||
* (binary identical regardless of memory address).
|
||||
* For more about the state word see UCharIteratorGetState.
|
||||
*
|
||||
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
|
||||
* the UTF-16 index may not be restored as well, but the iterator can deliver
|
||||
* the correct text contents and move relative to the current position
|
||||
* without performance degradation.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param state the state word from a getState() call
|
||||
* on a same-type, same-string iterator
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorGetState
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef void U_CALLCONV
|
||||
UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
|
||||
|
||||
|
||||
/**
|
||||
* C API for code unit iteration.
|
||||
* This can be used as a C wrapper around
|
||||
* CharacterIterator, Replaceable, or implemented using simple strings, etc.
|
||||
*
|
||||
* There are two roles for using UCharIterator:
|
||||
*
|
||||
* A "provider" sets the necessary function pointers and controls the "protected"
|
||||
* fields of the UCharIterator structure. A "provider" passes a UCharIterator
|
||||
* into C APIs that need a UCharIterator as an abstract, flexible string interface.
|
||||
*
|
||||
* Implementations of such C APIs are "callers" of UCharIterator functions;
|
||||
* they only use the "public" function pointers and never access the "protected"
|
||||
* fields directly.
|
||||
*
|
||||
* The current() and next() functions only check the current index against the
|
||||
* limit, and previous() only checks the current index against the start,
|
||||
* to see if the iterator already reached the end of the iteration range.
|
||||
*
|
||||
* The assumption - in all iterators - is that the index is moved via the API,
|
||||
* which means it won't go out of bounds, or the index is modified by
|
||||
* user code that knows enough about the iterator implementation to set valid
|
||||
* index values.
|
||||
*
|
||||
* UCharIterator functions return code unit values 0..0xffff,
|
||||
* or U_SENTINEL if the iteration bounds are reached.
|
||||
*
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
struct UCharIterator {
|
||||
/**
|
||||
* (protected) Pointer to string or wrapped object or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
const void *context;
|
||||
|
||||
/**
|
||||
* (protected) Length of string or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t length;
|
||||
|
||||
/**
|
||||
* (protected) Start index or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t start;
|
||||
|
||||
/**
|
||||
* (protected) Current index or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t index;
|
||||
|
||||
/**
|
||||
* (protected) Limit index or similar.
|
||||
* Not used by caller.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t limit;
|
||||
|
||||
/**
|
||||
* (protected) Used by UTF-8 iterators and possibly others.
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
int32_t reservedField;
|
||||
|
||||
/**
|
||||
* (public) Returns the current position or the
|
||||
* start or limit index of the iteration range.
|
||||
*
|
||||
* @see UCharIteratorGetIndex
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorGetIndex *getIndex;
|
||||
|
||||
/**
|
||||
* (public) Moves the current position relative to the start or limit of the
|
||||
* iteration range, or relative to the current position itself.
|
||||
* The movement is expressed in numbers of code units forward
|
||||
* or backward by specifying a positive or negative delta.
|
||||
*
|
||||
* @see UCharIteratorMove
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorMove *move;
|
||||
|
||||
/**
|
||||
* (public) Check if current() and next() can still
|
||||
* return another code unit.
|
||||
*
|
||||
* @see UCharIteratorHasNext
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorHasNext *hasNext;
|
||||
|
||||
/**
|
||||
* (public) Check if previous() can still return another code unit.
|
||||
*
|
||||
* @see UCharIteratorHasPrevious
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorHasPrevious *hasPrevious;
|
||||
|
||||
/**
|
||||
* (public) Return the code unit at the current position,
|
||||
* or U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @see UCharIteratorCurrent
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorCurrent *current;
|
||||
|
||||
/**
|
||||
* (public) Return the code unit at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @see UCharIteratorNext
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorNext *next;
|
||||
|
||||
/**
|
||||
* (public) Decrement the index and return the code unit from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return U_SENTINEL if there is none (index is at the start).
|
||||
*
|
||||
* @see UCharIteratorPrevious
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorPrevious *previous;
|
||||
|
||||
/**
|
||||
* (public) Reserved for future use. Currently NULL.
|
||||
*
|
||||
* @see UCharIteratorReserved
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
UCharIteratorReserved *reservedFn;
|
||||
|
||||
/**
|
||||
* (public) Return the state of the iterator, to be restored later with setState().
|
||||
* This function pointer is NULL if the iterator does not implement it.
|
||||
*
|
||||
* @see UCharIteratorGet
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UCharIteratorGetState *getState;
|
||||
|
||||
/**
|
||||
* (public) Restore the iterator state from the state word from a call
|
||||
* to getState().
|
||||
* This function pointer is NULL if the iterator does not implement it.
|
||||
*
|
||||
* @see UCharIteratorSet
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UCharIteratorSetState *setState;
|
||||
};
|
||||
|
||||
/**
|
||||
* Helper function for UCharIterator to get the code point
|
||||
* at the current index.
|
||||
*
|
||||
* Return the code point that includes the code unit at the current position,
|
||||
* or U_SENTINEL if there is none (index is at the limit).
|
||||
* If the current code unit is a lead or trail surrogate,
|
||||
* then the following or preceding surrogate is used to form
|
||||
* the code point value.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code point
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see U16_GET
|
||||
* @see UnicodeString::char32At()
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
uiter_current32(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Helper function for UCharIterator to get the next code point.
|
||||
*
|
||||
* Return the code point at the current index and increment
|
||||
* the index (post-increment, like s[i++]),
|
||||
* or return U_SENTINEL if there is none (index is at the limit).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the current code point (and post-increment the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see U16_NEXT
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
uiter_next32(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Helper function for UCharIterator to get the previous code point.
|
||||
*
|
||||
* Decrement the index and return the code point from there
|
||||
* (pre-decrement, like s[--i]),
|
||||
* or return U_SENTINEL if there is none (index is at the start).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the previous code point (after pre-decrementing the current index)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see U16_PREV
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
uiter_previous32(UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Get the "state" of the iterator in the form of a single 32-bit word.
|
||||
* This is a convenience function that calls iter->getState(iter)
|
||||
* if iter->getState is not NULL;
|
||||
* if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
|
||||
*
|
||||
* Some UCharIterator implementations may not be able to return
|
||||
* a valid state for each position, in which case they return UITER_NO_STATE instead.
|
||||
* This will be clearly documented for each such iterator (none of the public ones here).
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @return the state word
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorGetState
|
||||
* @see UITER_NO_STATE
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
uiter_getState(const UCharIterator *iter);
|
||||
|
||||
/**
|
||||
* Restore the "state" of the iterator using a state word from a getState() call.
|
||||
* This is a convenience function that calls iter->setState(iter, state, pErrorCode)
|
||||
* if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
|
||||
*
|
||||
* @param iter the UCharIterator structure ("this pointer")
|
||||
* @param state the state word from a getState() call
|
||||
* on a same-type, same-string iterator
|
||||
* @param pErrorCode Must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see UCharIteratorSetState
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a string.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration over the string s
|
||||
* with iteration boundaries start=index=0 and length=limit=string length.
|
||||
* The "provider" may set the start, index, and limit values at any time
|
||||
* within the range 0..length.
|
||||
* The length field will be ignored.
|
||||
*
|
||||
* The string pointer s is set into UCharIterator.context without copying
|
||||
* or reallocating the string contents.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s String to iterate over
|
||||
* @param length Length of s, or -1 if NUL-terminated
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a UTF-16BE string
|
||||
* (byte vector with a big-endian pair of bytes per UChar).
|
||||
*
|
||||
* Everything works just like with a normal UChar iterator (uiter_setString),
|
||||
* except that UChars are assembled from byte pairs,
|
||||
* and that the length argument here indicates an even number of bytes.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s UTF-16BE string to iterate over
|
||||
* @param length Length of s as an even number of bytes, or -1 if NUL-terminated
|
||||
* (NUL means pair of 0 bytes at even index from s)
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @see uiter_setString
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a UTF-8 string.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration over the UTF-8 string s
|
||||
* with UTF-8 iteration boundaries 0 and length.
|
||||
* The implementation counts the UTF-16 index on the fly and
|
||||
* lazily evaluates the UTF-16 length of the text.
|
||||
*
|
||||
* The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
|
||||
* When the reservedField is not 0, then it contains a supplementary code point
|
||||
* and the UTF-16 index is between the two corresponding surrogates.
|
||||
* At that point, the UTF-8 index is behind that code point.
|
||||
*
|
||||
* The UTF-8 string pointer s is set into UCharIterator.context without copying
|
||||
* or reallocating the string contents.
|
||||
*
|
||||
* getState() returns a state value consisting of
|
||||
* - the current UTF-8 source byte index (bits 31..1)
|
||||
* - a flag (bit 0) that indicates whether the UChar position is in the middle
|
||||
* of a surrogate pair
|
||||
* (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
|
||||
*
|
||||
* getState() cannot also encode the UTF-16 index in the state value.
|
||||
* move(relative to limit or length), or
|
||||
* move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param s UTF-8 string to iterate over
|
||||
* @param length Length of s in bytes, or -1 if NUL-terminated
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to wrap around a C++ CharacterIterator.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration using the
|
||||
* CharacterIterator charIter.
|
||||
*
|
||||
* The CharacterIterator pointer charIter is set into UCharIterator.context
|
||||
* without copying or cloning the CharacterIterator object.
|
||||
* The other "protected" UCharIterator fields are set to 0 and will be ignored.
|
||||
* The iteration index and boundaries are controlled by the CharacterIterator.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param charIter CharacterIterator to wrap
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
|
||||
|
||||
/**
|
||||
* Set up a UCharIterator to iterate over a C++ Replaceable.
|
||||
*
|
||||
* Sets the UCharIterator function pointers for iteration over the
|
||||
* Replaceable rep with iteration boundaries start=index=0 and
|
||||
* length=limit=rep->length().
|
||||
* The "provider" may set the start, index, and limit values at any time
|
||||
* within the range 0..length=rep->length().
|
||||
* The length field will be ignored.
|
||||
*
|
||||
* The Replaceable pointer rep is set into UCharIterator.context without copying
|
||||
* or cloning/reallocating the Replaceable object.
|
||||
*
|
||||
* getState() simply returns the current index.
|
||||
* move() will always return the final index.
|
||||
*
|
||||
* @param iter UCharIterator structure to be set for iteration
|
||||
* @param rep Replaceable to iterate over
|
||||
*
|
||||
* @see UCharIterator
|
||||
* @stable ICU 2.1
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
|
||||
|
||||
#endif
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,491 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: umachine.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This file defines basic types and constants for ICU to be
|
||||
* platform-independent. umachine.h and utf.h are included into
|
||||
* utypes.h to provide all the general definitions for ICU.
|
||||
* All of these definitions used to be in utypes.h before
|
||||
* the UTF-handling macros made this unmaintainable.
|
||||
*/
|
||||
|
||||
#ifndef __UMACHINE_H__
|
||||
#define __UMACHINE_H__
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Basic types and constants for UTF
|
||||
*
|
||||
* <h2> Basic types and constants for UTF </h2>
|
||||
* This file defines basic types and constants for utf.h to be
|
||||
* platform-independent. umachine.h and utf.h are included into
|
||||
* utypes.h to provide all the general definitions for ICU.
|
||||
* All of these definitions used to be in utypes.h before
|
||||
* the UTF-handling macros made this unmaintainable.
|
||||
*
|
||||
*/
|
||||
/*==========================================================================*/
|
||||
/* Include platform-dependent definitions */
|
||||
/* which are contained in the platform-specific file platform.h */
|
||||
/*==========================================================================*/
|
||||
|
||||
#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
|
||||
|
||||
/*
|
||||
* ANSI C headers:
|
||||
* stddef.h defines wchar_t
|
||||
*/
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/*==========================================================================*/
|
||||
/* For C wrappers, we use the symbol U_CAPI. */
|
||||
/* This works properly if the includer is C or C++. */
|
||||
/* Functions are declared U_CAPI return-type U_EXPORT2 function-name()... */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_CFUNC
|
||||
* This is used in a declaration of a library private ICU C function.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_CDECL_BEGIN
|
||||
* This is used to begin a declaration of a library private ICU C API.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_CDECL_END
|
||||
* This is used to end a declaration of a library private ICU C API
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
# define U_CFUNC extern "C"
|
||||
# define U_CDECL_BEGIN extern "C" {
|
||||
# define U_CDECL_END }
|
||||
#else
|
||||
# define U_CFUNC extern
|
||||
# define U_CDECL_BEGIN
|
||||
# define U_CDECL_END
|
||||
#endif
|
||||
|
||||
#ifndef U_ATTRIBUTE_DEPRECATED
|
||||
/**
|
||||
* \def U_ATTRIBUTE_DEPRECATED
|
||||
* This is used for GCC specific attributes
|
||||
* @internal
|
||||
*/
|
||||
#if U_GCC_MAJOR_MINOR >= 302
|
||||
# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
|
||||
/**
|
||||
* \def U_ATTRIBUTE_DEPRECATED
|
||||
* This is used for Visual C++ specific attributes
|
||||
* @internal
|
||||
*/
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
# define U_ATTRIBUTE_DEPRECATED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
|
||||
#define U_CAPI U_CFUNC U_EXPORT
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
|
||||
#define U_STABLE U_CAPI
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API */
|
||||
#define U_DRAFT U_CAPI
|
||||
/** This is used to declare a function as a deprecated public ICU C API */
|
||||
#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API */
|
||||
#define U_OBSOLETE U_CAPI
|
||||
/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
|
||||
#define U_INTERNAL U_CAPI
|
||||
|
||||
/**
|
||||
* \def U_OVERRIDE
|
||||
* Defined to the C++11 "override" keyword if available.
|
||||
* Denotes a class or member which is an override of the base class.
|
||||
* May result in an error if it applied to something not an override.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_OVERRIDE
|
||||
#define U_OVERRIDE override
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_FINAL
|
||||
* Defined to the C++11 "final" keyword if available.
|
||||
* Denotes a class or member which may not be overridden in subclasses.
|
||||
* May result in an error if subclasses attempt to override.
|
||||
* @internal
|
||||
*/
|
||||
#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
|
||||
#define U_FINAL final
|
||||
#endif
|
||||
|
||||
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
|
||||
// series of statements wrapped in { } blocks and the caller could choose to
|
||||
// either treat them as if they were actual functions and end the invocation
|
||||
// with a trailing ; creating an empty statement after the block or else omit
|
||||
// this trailing ; using the knowledge that the macro would expand to { }.
|
||||
//
|
||||
// But doing so doesn't work well with macros that look like functions and
|
||||
// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
|
||||
// switches to the standard solution of wrapping such macros in do { } while.
|
||||
//
|
||||
// This will however break existing code that depends on being able to invoke
|
||||
// these macros without a trailing ; so to be able to remain compatible with
|
||||
// such code the wrapper is itself defined as macros so that it's possible to
|
||||
// build ICU 65 and later with the old macro behaviour, like this:
|
||||
//
|
||||
// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
|
||||
// runConfigureICU ...
|
||||
//
|
||||
|
||||
/**
|
||||
* \def UPRV_BLOCK_MACRO_BEGIN
|
||||
* Defined as the "do" keyword by default.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UPRV_BLOCK_MACRO_BEGIN
|
||||
#define UPRV_BLOCK_MACRO_BEGIN do
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UPRV_BLOCK_MACRO_END
|
||||
* Defined as "while (false)" by default.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UPRV_BLOCK_MACRO_END
|
||||
#define UPRV_BLOCK_MACRO_END while (false)
|
||||
#endif
|
||||
|
||||
/*==========================================================================*/
|
||||
/* limits for int32_t etc., like in POSIX inttypes.h */
|
||||
/*==========================================================================*/
|
||||
|
||||
#ifndef INT8_MIN
|
||||
/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT8_MIN ((int8_t)(-128))
|
||||
#endif
|
||||
#ifndef INT16_MIN
|
||||
/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT16_MIN ((int16_t)(-32767-1))
|
||||
#endif
|
||||
#ifndef INT32_MIN
|
||||
/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT32_MIN ((int32_t)(-2147483647-1))
|
||||
#endif
|
||||
|
||||
#ifndef INT8_MAX
|
||||
/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT8_MAX ((int8_t)(127))
|
||||
#endif
|
||||
#ifndef INT16_MAX
|
||||
/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT16_MAX ((int16_t)(32767))
|
||||
#endif
|
||||
#ifndef INT32_MAX
|
||||
/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT32_MAX ((int32_t)(2147483647))
|
||||
#endif
|
||||
|
||||
#ifndef UINT8_MAX
|
||||
/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT8_MAX ((uint8_t)(255U))
|
||||
#endif
|
||||
#ifndef UINT16_MAX
|
||||
/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT16_MAX ((uint16_t)(65535U))
|
||||
#endif
|
||||
#ifndef UINT32_MAX
|
||||
/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT32_MAX ((uint32_t)(4294967295U))
|
||||
#endif
|
||||
|
||||
#if defined(U_INT64_T_UNAVAILABLE)
|
||||
# error int64_t is required for decimal format and rule-based number format.
|
||||
#else
|
||||
# ifndef INT64_C
|
||||
/**
|
||||
* Provides a platform independent way to specify a signed 64-bit integer constant.
|
||||
* note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
# define INT64_C(c) c ## LL
|
||||
# endif
|
||||
# ifndef UINT64_C
|
||||
/**
|
||||
* Provides a platform independent way to specify an unsigned 64-bit integer constant.
|
||||
* note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
# define UINT64_C(c) c ## ULL
|
||||
# endif
|
||||
# ifndef U_INT64_MIN
|
||||
/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
|
||||
# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
|
||||
# endif
|
||||
# ifndef U_INT64_MAX
|
||||
/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
|
||||
# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
|
||||
# endif
|
||||
# ifndef U_UINT64_MAX
|
||||
/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
|
||||
# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Boolean data type */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* The ICU boolean type, a signed-byte integer.
|
||||
* ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
|
||||
* Also provides a fixed type definition, as opposed to
|
||||
* type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef int8_t UBool;
|
||||
|
||||
/**
|
||||
* \def U_DEFINE_FALSE_AND_TRUE
|
||||
* Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
|
||||
* These obsolete macros sometimes break compilation of other code that
|
||||
* defines enum constants or similar with these names.
|
||||
* C++ has long defined bool/false/true.
|
||||
* C99 also added definitions for these, although as macros; see stdbool.h.
|
||||
*
|
||||
* You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
|
||||
*
|
||||
* @internal ICU 68
|
||||
*/
|
||||
#ifdef U_DEFINE_FALSE_AND_TRUE
|
||||
// Use the predefined value.
|
||||
#elif defined(U_COMBINED_IMPLEMENTATION) || \
|
||||
defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
|
||||
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
|
||||
defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
// Inside ICU: Keep FALSE & TRUE available.
|
||||
# define U_DEFINE_FALSE_AND_TRUE 1
|
||||
#else
|
||||
// Outside ICU: Avoid collision with non-macro definitions of FALSE & TRUE.
|
||||
# define U_DEFINE_FALSE_AND_TRUE 0
|
||||
#endif
|
||||
|
||||
#if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
|
||||
#ifndef TRUE
|
||||
/**
|
||||
* The TRUE value of a UBool.
|
||||
*
|
||||
* @deprecated ICU 68 Use standard "true" instead.
|
||||
*/
|
||||
# define TRUE 1
|
||||
#endif
|
||||
#ifndef FALSE
|
||||
/**
|
||||
* The FALSE value of a UBool.
|
||||
*
|
||||
* @deprecated ICU 68 Use standard "false" instead.
|
||||
*/
|
||||
# define FALSE 0
|
||||
#endif
|
||||
#endif // U_DEFINE_FALSE_AND_TRUE
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Unicode data types */
|
||||
/*==========================================================================*/
|
||||
|
||||
/* wchar_t-related definitions -------------------------------------------- */
|
||||
|
||||
/*
|
||||
* \def U_WCHAR_IS_UTF16
|
||||
* Defined if wchar_t uses UTF-16.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
/*
|
||||
* \def U_WCHAR_IS_UTF32
|
||||
* Defined if wchar_t uses UTF-32.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
|
||||
# ifdef __STDC_ISO_10646__
|
||||
# if (U_SIZEOF_WCHAR_T==2)
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# elif (U_SIZEOF_WCHAR_T==4)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# endif
|
||||
# elif defined __UCS2__
|
||||
# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# endif
|
||||
# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
|
||||
# if (U_SIZEOF_WCHAR_T==4)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# endif
|
||||
# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# elif U_PLATFORM_HAS_WIN32_API
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* UChar and UChar32 definitions -------------------------------------------- */
|
||||
|
||||
/** Number of bytes in a UChar. @stable ICU 2.0 */
|
||||
#define U_SIZEOF_UCHAR 2
|
||||
|
||||
/**
|
||||
* \def U_CHAR16_IS_TYPEDEF
|
||||
* If 1, then char16_t is a typedef and not a real type (yet)
|
||||
* @internal
|
||||
*/
|
||||
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
|
||||
// for AIX, uchar.h needs to be included
|
||||
# include <uchar.h>
|
||||
# define U_CHAR16_IS_TYPEDEF 1
|
||||
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
|
||||
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
|
||||
# define U_CHAR16_IS_TYPEDEF 1
|
||||
#else
|
||||
# define U_CHAR16_IS_TYPEDEF 0
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* \var UChar
|
||||
*
|
||||
* The base type for UTF-16 code units and pointers.
|
||||
* Unsigned 16-bit integer.
|
||||
* Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
|
||||
*
|
||||
* UChar is configurable by defining the macro UCHAR_TYPE
|
||||
* on the preprocessor or compiler command line:
|
||||
* -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
|
||||
* (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
|
||||
* This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
|
||||
*
|
||||
* The default is UChar=char16_t.
|
||||
*
|
||||
* C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
|
||||
*
|
||||
* In C, char16_t is a simple typedef of uint_least16_t.
|
||||
* ICU requires uint_least16_t=uint16_t for data memory mapping.
|
||||
* On macOS, char16_t is not available because the uchar.h standard header is missing.
|
||||
*
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
|
||||
#if 1
|
||||
// #if 1 is normal. UChar defaults to char16_t in C++.
|
||||
// For configuration testing of UChar=uint16_t temporarily change this to #if 0.
|
||||
// The intltest Makefile #defines UCHAR_TYPE=char16_t,
|
||||
// so we only #define it to uint16_t if it is undefined so far.
|
||||
#elif !defined(UCHAR_TYPE)
|
||||
# define UCHAR_TYPE uint16_t
|
||||
#endif
|
||||
|
||||
#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
|
||||
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
|
||||
// Inside the ICU library code, never configurable.
|
||||
typedef char16_t UChar;
|
||||
#elif defined(UCHAR_TYPE)
|
||||
typedef UCHAR_TYPE UChar;
|
||||
#elif (U_CPLUSPLUS_VERSION >= 11)
|
||||
typedef char16_t UChar;
|
||||
#else
|
||||
typedef uint16_t UChar;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \var OldUChar
|
||||
* Default ICU 58 definition of UChar.
|
||||
* A base type for UTF-16 code units and pointers.
|
||||
* Unsigned 16-bit integer.
|
||||
*
|
||||
* Define OldUChar to be wchar_t if that is 16 bits wide.
|
||||
* If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
|
||||
*
|
||||
* This makes the definition of OldUChar platform-dependent
|
||||
* but allows direct string type compatibility with platforms with
|
||||
* 16-bit wchar_t types.
|
||||
*
|
||||
* This is how UChar was defined in ICU 58, for transition convenience.
|
||||
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
|
||||
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
|
||||
*
|
||||
* @stable ICU 59
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
typedef wchar_t OldUChar;
|
||||
#elif defined(__CHAR16_TYPE__)
|
||||
typedef __CHAR16_TYPE__ OldUChar;
|
||||
#else
|
||||
typedef uint16_t OldUChar;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Define UChar32 as a type for single Unicode code points.
|
||||
* UChar32 is a signed 32-bit integer (same as int32_t).
|
||||
*
|
||||
* The Unicode code point range is 0..0x10ffff.
|
||||
* All other values (negative or >=0x110000) are illegal as Unicode code points.
|
||||
* They may be used as sentinel values to indicate "done", "error"
|
||||
* or similar non-code point conditions.
|
||||
*
|
||||
* Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
|
||||
* to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
|
||||
* or else to be uint32_t.
|
||||
* That is, the definition of UChar32 was platform-dependent.
|
||||
*
|
||||
* @see U_SENTINEL
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
typedef int32_t UChar32;
|
||||
|
||||
/**
|
||||
* This value is intended for sentinel values for APIs that
|
||||
* (take or) return single code points (UChar32).
|
||||
* It is outside of the Unicode code point range 0..0x10ffff.
|
||||
*
|
||||
* For example, a "done" or "error" value in a new API
|
||||
* could be indicated with U_SENTINEL.
|
||||
*
|
||||
* ICU APIs designed before ICU 2.4 usually define service-specific "done"
|
||||
* values, mostly 0xffff.
|
||||
* Those may need to be distinguished from
|
||||
* actual U+ffff text contents by calling functions like
|
||||
* CharacterIterator::hasNext() or UnicodeString::length().
|
||||
*
|
||||
* @return -1
|
||||
* @see UChar32
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_SENTINEL (-1)
|
||||
|
||||
#include "unicode/urename.h"
|
||||
|
||||
#endif
|
||||
@@ -1,62 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: umisc.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999oct15
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef UMISC_H
|
||||
#define UMISC_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API:misc definitions
|
||||
*
|
||||
* This file contains miscellaneous definitions for the C APIs.
|
||||
*/
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/** A struct representing a range of text containing a specific field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef struct UFieldPosition {
|
||||
/**
|
||||
* The field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t field;
|
||||
/**
|
||||
* The start of the text range containing field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t beginIndex;
|
||||
/**
|
||||
* The limit of the text range containing field
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
int32_t endIndex;
|
||||
} UFieldPosition;
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
/**
|
||||
* Opaque type returned by registerInstance, registerFactory and unregister for service registration.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
typedef const void* URegistryKey;
|
||||
#endif
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,324 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: uobject.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jun26
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UOBJECT_H__
|
||||
#define __UOBJECT_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/platform.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Common ICU base class UObject.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_NO_THROW
|
||||
* Since ICU 64, use U_NOEXCEPT instead.
|
||||
*
|
||||
* Previously, define this to define the throw() specification so
|
||||
* certain functions do not throw any exceptions
|
||||
*
|
||||
* UMemory operator new methods should have the throw() specification
|
||||
* appended to them, so that the compiler adds the additional NULL check
|
||||
* before calling constructors. Without, if <code>operator new</code> returns NULL the
|
||||
* constructor is still called, and if the constructor references member
|
||||
* data, (which it typically does), the result is a segmentation violation.
|
||||
*
|
||||
* @stable ICU 4.2. Since ICU 64, Use U_NOEXCEPT instead. See ICU-20422.
|
||||
*/
|
||||
#ifndef U_NO_THROW
|
||||
#define U_NO_THROW U_NOEXCEPT
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* UClassID-based RTTI */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* UClassID is used to identify classes without using the compiler's RTTI.
|
||||
* This was used before C++ compilers consistently supported RTTI.
|
||||
* ICU 4.6 requires compiler RTTI to be turned on.
|
||||
*
|
||||
* Each class hierarchy which needs
|
||||
* to implement polymorphic clone() or operator==() defines two methods,
|
||||
* described in detail below. UClassID values can be compared using
|
||||
* operator==(). Nothing else should be done with them.
|
||||
*
|
||||
* \par
|
||||
* In class hierarchies that implement "poor man's RTTI",
|
||||
* each concrete subclass implements getDynamicClassID() in the same way:
|
||||
*
|
||||
* \code
|
||||
* class Derived {
|
||||
* public:
|
||||
* virtual UClassID getDynamicClassID() const
|
||||
* { return Derived::getStaticClassID(); }
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* Each concrete class implements getStaticClassID() as well, which allows
|
||||
* clients to test for a specific type.
|
||||
*
|
||||
* \code
|
||||
* class Derived {
|
||||
* public:
|
||||
* static UClassID U_EXPORT2 getStaticClassID();
|
||||
* private:
|
||||
* static char fgClassID;
|
||||
* }
|
||||
*
|
||||
* // In Derived.cpp:
|
||||
* UClassID Derived::getStaticClassID()
|
||||
* { return (UClassID)&Derived::fgClassID; }
|
||||
* char Derived::fgClassID = 0; // Value is irrelevant
|
||||
* \endcode
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef void* UClassID;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* UMemory is the common ICU base class.
|
||||
* All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
|
||||
*
|
||||
* This is primarily to make it possible and simple to override the
|
||||
* C++ memory management by adding new/delete operators to this base class.
|
||||
*
|
||||
* To override ALL ICU memory management, including that from plain C code,
|
||||
* replace the allocation functions declared in cmemory.h
|
||||
*
|
||||
* UMemory does not contain any virtual functions.
|
||||
* Common "boilerplate" functions are defined in UObject.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
class U_COMMON_API UMemory {
|
||||
public:
|
||||
|
||||
/* test versions for debugging shaper heap memory problems */
|
||||
#ifdef SHAPER_MEMORY_DEBUG
|
||||
static void * NewArray(int size, int count);
|
||||
static void * GrowArray(void * array, int newSize );
|
||||
static void FreeArray(void * array );
|
||||
#endif
|
||||
|
||||
#if U_OVERRIDE_CXX_ALLOCATION
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* simple, non-class types are allocated using the macros in common/cmemory.h
|
||||
* (uprv_malloc(), uprv_free(), uprv_realloc());
|
||||
* they or something else could be used here to implement C++ new/delete
|
||||
* for ICU4C C++ classes
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void * U_EXPORT2 operator new(size_t size) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* See new().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void * U_EXPORT2 operator new[](size_t size) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* simple, non-class types are allocated using the macros in common/cmemory.h
|
||||
* (uprv_malloc(), uprv_free(), uprv_realloc());
|
||||
* they or something else could be used here to implement C++ new/delete
|
||||
* for ICU4C C++ classes
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void U_EXPORT2 operator delete(void *p) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management.
|
||||
* See delete().
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
static void U_EXPORT2 operator delete[](void *p) U_NOEXCEPT;
|
||||
|
||||
#if U_HAVE_PLACEMENT_NEW
|
||||
/**
|
||||
* Override for ICU4C C++ memory management for STL.
|
||||
* See new().
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NOEXCEPT { return ptr; }
|
||||
|
||||
/**
|
||||
* Override for ICU4C C++ memory management for STL.
|
||||
* See delete().
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
static inline void U_EXPORT2 operator delete(void *, void *) U_NOEXCEPT {}
|
||||
#endif /* U_HAVE_PLACEMENT_NEW */
|
||||
#if U_HAVE_DEBUG_LOCATION_NEW
|
||||
/**
|
||||
* This method overrides the MFC debug version of the operator new
|
||||
*
|
||||
* @param size The requested memory size
|
||||
* @param file The file where the allocation was requested
|
||||
* @param line The line where the allocation was requested
|
||||
*/
|
||||
static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NOEXCEPT;
|
||||
/**
|
||||
* This method provides a matching delete for the MFC debug new
|
||||
*
|
||||
* @param p The pointer to the allocated memory
|
||||
* @param file The file where the allocation was requested
|
||||
* @param line The line where the allocation was requested
|
||||
*/
|
||||
static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NOEXCEPT;
|
||||
#endif /* U_HAVE_DEBUG_LOCATION_NEW */
|
||||
#endif /* U_OVERRIDE_CXX_ALLOCATION */
|
||||
|
||||
/*
|
||||
* Assignment operator not declared. The compiler will provide one
|
||||
* which does nothing since this class does not contain any data members.
|
||||
* API/code coverage may show the assignment operator as present and
|
||||
* untested - ignore.
|
||||
* Subclasses need this assignment operator if they use compiler-provided
|
||||
* assignment operators of their own. An alternative to not declaring one
|
||||
* here would be to declare and empty-implement a protected or public one.
|
||||
UMemory &UMemory::operator=(const UMemory &);
|
||||
*/
|
||||
};
|
||||
|
||||
/**
|
||||
* UObject is the common ICU "boilerplate" class.
|
||||
* UObject inherits UMemory (starting with ICU 2.4),
|
||||
* and all other public ICU C++ classes
|
||||
* are derived from UObject (starting with ICU 2.2).
|
||||
*
|
||||
* UObject contains common virtual functions, in particular a virtual destructor.
|
||||
*
|
||||
* The clone() function is not available in UObject because it is not
|
||||
* implemented by all ICU classes.
|
||||
* Many ICU services provide a clone() function for their class trees,
|
||||
* defined on the service's C++ base class
|
||||
* (which itself is a subclass of UObject).
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
class U_COMMON_API UObject : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Destructor.
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual ~UObject();
|
||||
|
||||
/**
|
||||
* ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
|
||||
* The base class implementation returns a dummy value.
|
||||
*
|
||||
* Use compiler RTTI rather than ICU's "poor man's RTTI".
|
||||
* Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
|
||||
*
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
protected:
|
||||
// the following functions are protected to prevent instantiation and
|
||||
// direct use of UObject itself
|
||||
|
||||
// default constructor
|
||||
// inline UObject() {}
|
||||
|
||||
// copy constructor
|
||||
// inline UObject(const UObject &other) {}
|
||||
|
||||
#if 0
|
||||
// TODO Sometime in the future. Implement operator==().
|
||||
// (This comment inserted in 2.2)
|
||||
// some or all of the following "boilerplate" functions may be made public
|
||||
// in a future ICU4C release when all subclasses implement them
|
||||
|
||||
// assignment operator
|
||||
// (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
|
||||
// commented out because the implementation is the same as a compiler's default
|
||||
// UObject &operator=(const UObject &other) { return *this; }
|
||||
|
||||
// comparison operators
|
||||
virtual inline UBool operator==(const UObject &other) const { return this==&other; }
|
||||
inline UBool operator!=(const UObject &other) const { return !operator==(other); }
|
||||
|
||||
// clone() commented out from the base class:
|
||||
// some compilers do not support co-variant return types
|
||||
// (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
|
||||
// see also UObject class documentation.
|
||||
// virtual UObject *clone() const;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Assignment operator not declared. The compiler will provide one
|
||||
* which does nothing since this class does not contain any data members.
|
||||
* API/code coverage may show the assignment operator as present and
|
||||
* untested - ignore.
|
||||
* Subclasses need this assignment operator if they use compiler-provided
|
||||
* assignment operators of their own. An alternative to not declaring one
|
||||
* here would be to declare and empty-implement a protected or public one.
|
||||
UObject &UObject::operator=(const UObject &);
|
||||
*/
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* This is a simple macro to add ICU RTTI to an ICU object implementation.
|
||||
* This does not go into the header. This should only be used in *.cpp files.
|
||||
*
|
||||
* @param myClass The name of the class that needs RTTI defined.
|
||||
* @internal
|
||||
*/
|
||||
#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
|
||||
UClassID U_EXPORT2 myClass::getStaticClassID() { \
|
||||
static char classID = 0; \
|
||||
return (UClassID)&classID; \
|
||||
} \
|
||||
UClassID myClass::getDynamicClassID() const \
|
||||
{ return myClass::getStaticClassID(); }
|
||||
|
||||
|
||||
/**
|
||||
* This macro adds ICU RTTI to an ICU abstract class implementation.
|
||||
* This macro should be invoked in *.cpp files. The corresponding
|
||||
* header should declare getStaticClassID.
|
||||
*
|
||||
* @param myClass The name of the class that needs RTTI defined.
|
||||
* @internal
|
||||
*/
|
||||
#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
|
||||
UClassID U_EXPORT2 myClass::getStaticClassID() { \
|
||||
static char classID = 0; \
|
||||
return (UClassID)&classID; \
|
||||
}
|
||||
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,734 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf16.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep09
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: 16-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (https://unicode-org.github.io/icu/userguide/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.
|
||||
*/
|
||||
|
||||
#ifndef __UTF16_H__
|
||||
#define __UTF16_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Does this code unit alone encode a code point (BMP, not a surrogate)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
|
||||
|
||||
/**
|
||||
* Is this code unit a lead surrogate (U+d800..U+dbff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
|
||||
|
||||
/**
|
||||
* Is this code unit a trail surrogate (U+dc00..U+dfff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
|
||||
|
||||
/**
|
||||
* Is this code unit a surrogate (U+d800..U+dfff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
|
||||
* is it a lead surrogate?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
|
||||
* is it a trail surrogate?
|
||||
* @param c 16-bit code unit
|
||||
* @return true or false
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
|
||||
|
||||
/**
|
||||
* Helper constant for U16_GET_SUPPLEMENTARY.
|
||||
* @internal
|
||||
*/
|
||||
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
|
||||
|
||||
/**
|
||||
* Get a supplementary code point value (U+10000..U+10ffff)
|
||||
* from its lead and trail surrogates.
|
||||
* The result is undefined if the input values are not
|
||||
* lead and trail surrogates.
|
||||
*
|
||||
* @param lead lead surrogate (U+d800..U+dbff)
|
||||
* @param trail trail surrogate (U+dc00..U+dfff)
|
||||
* @return supplementary code point (U+10000..U+10ffff)
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET_SUPPLEMENTARY(lead, trail) \
|
||||
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
|
||||
|
||||
|
||||
/**
|
||||
* Get the lead surrogate (0xd800..0xdbff) for a
|
||||
* supplementary code point (0x10000..0x10ffff).
|
||||
* @param supplementary 32-bit code point (U+10000..U+10ffff)
|
||||
* @return lead surrogate (U+d800..U+dbff) for supplementary
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
|
||||
|
||||
/**
|
||||
* Get the trail surrogate (0xdc00..0xdfff) for a
|
||||
* supplementary code point (0x10000..0x10ffff).
|
||||
* @param supplementary 32-bit code point (U+10000..U+10ffff)
|
||||
* @return trail surrogate (U+dc00..U+dfff) for supplementary
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
|
||||
|
||||
/**
|
||||
* How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
|
||||
* The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
|
||||
* @param c 32-bit code point
|
||||
* @return 1 or 2
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
|
||||
|
||||
/**
|
||||
* The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
|
||||
* @return 2
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_MAX_LENGTH 2
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
* The result is undefined if the offset points to a single, unpaired surrogate.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
|
||||
} else { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to a single, unpaired surrogate, then
|
||||
* c is set to that unpaired surrogate.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} \
|
||||
} else { \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to a single, unpaired surrogate, then
|
||||
* c is set to U+FFFD.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} else { \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with forward iteration --------------------------------------- */
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset points to a single, unpaired lead surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_LEAD(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate or
|
||||
* to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_LEAD(c)) { \
|
||||
uint16_t __c2; \
|
||||
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
|
||||
++(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate or
|
||||
* to a single, unpaired lead surrogate, then c is set to U+FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
|
||||
++(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 or 2 code units.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
|
||||
* Otherwise, the result is undefined.
|
||||
*
|
||||
* @param s const UChar * string buffer
|
||||
* @param i string offset
|
||||
* @param c code point to append
|
||||
* @see U16_APPEND
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((uint32_t)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint16_t)(c); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
|
||||
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 or 2 code units.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Safe" macro, checks for a valid code point.
|
||||
* If a surrogate pair is written, checks for sufficient space in the string.
|
||||
* If the code point is not valid or a trail surrogate does not fit,
|
||||
* then isError is set to true.
|
||||
*
|
||||
* @param s const UChar * string buffer
|
||||
* @param i string offset, must be i<capacity
|
||||
* @param capacity size of the string buffer
|
||||
* @param c code point to append
|
||||
* @param isError output UBool set to true if an error occurs, otherwise not modified
|
||||
* @see U16_APPEND_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((uint32_t)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint16_t)(c); \
|
||||
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
|
||||
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
|
||||
} else /* c>0x10ffff or not enough space */ { \
|
||||
(isError)=true; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_FWD_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)++])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @see U16_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U16_FWD_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U16_FWD_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param n number of code points to skip
|
||||
* @see U16_FWD_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
|
||||
U16_FWD_1(s, i, length); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to the trail surrogate of a surrogate pair,
|
||||
* then the offset is decremented.
|
||||
* Otherwise, it is not modified.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_SET_CP_START
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[i])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to the trail surrogate of a surrogate pair,
|
||||
* then the offset is decremented.
|
||||
* Otherwise, it is not modified.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i
|
||||
* @see U16_SET_CP_START_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset is behind a single, unpaired trail surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_TRAIL(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate or behind a single, unpaired
|
||||
* trail surrogate, then c is set to that unpaired surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_TRAIL(c)) { \
|
||||
uint16_t __c2; \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
--(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate or behind a single, unpaired
|
||||
* trail surrogate, then c is set to U+FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
--(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_BACK_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[--(i)])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @see U16_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U16_BACK_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U16_BACK_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start start of string
|
||||
* @param i string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U16_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
U16_BACK_1(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind the lead surrogate of a surrogate pair,
|
||||
* then the offset is incremented.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_SET_CP_LIMIT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)-1])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind the lead surrogate of a surrogate pair,
|
||||
* then the offset is incremented.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, start<=i<=length
|
||||
* @param length int32_t string length
|
||||
* @see U16_SET_CP_LIMIT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
#endif
|
||||
@@ -1,882 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf8.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: 8-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (https://unicode-org.github.io/icu/userguide/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.
|
||||
*/
|
||||
|
||||
#ifndef __UTF8_H__
|
||||
#define __UTF8_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/* internal definitions ----------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Counts the trail bytes for a UTF-8 lead byte.
|
||||
* Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
|
||||
* leadByte might be evaluated multiple times.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
*
|
||||
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_COUNT_TRAIL_BYTES(leadByte) \
|
||||
(U8_IS_LEAD(leadByte) ? \
|
||||
((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
|
||||
|
||||
/**
|
||||
* Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
|
||||
* Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
|
||||
* leadByte might be evaluated multiple times.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
*
|
||||
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
|
||||
(((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
|
||||
|
||||
/**
|
||||
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
|
||||
|
||||
/**
|
||||
* Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
|
||||
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
|
||||
* Lead byte E0..EF bits 3..0 are used as byte index,
|
||||
* first trail byte bits 7..5 are used as bit index into that byte.
|
||||
* @see U8_IS_VALID_LEAD3_AND_T1
|
||||
* @internal
|
||||
*/
|
||||
#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
|
||||
|
||||
/**
|
||||
* Internal 3-byte UTF-8 validity check.
|
||||
* Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
|
||||
|
||||
/**
|
||||
* Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
|
||||
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
|
||||
* First trail byte bits 7..4 are used as byte index,
|
||||
* lead byte F0..F4 bits 2..0 are used as bit index into that byte.
|
||||
* @see U8_IS_VALID_LEAD4_AND_T1
|
||||
* @internal
|
||||
*/
|
||||
#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
|
||||
|
||||
/**
|
||||
* Internal 4-byte UTF-8 validity check.
|
||||
* Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
|
||||
|
||||
/**
|
||||
* Function for handling "next code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
|
||||
|
||||
/**
|
||||
* Function for handling "append code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
|
||||
|
||||
/**
|
||||
* Function for handling "previous code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
|
||||
|
||||
/**
|
||||
* Function for handling "skip backward one code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
|
||||
|
||||
/**
|
||||
* Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
|
||||
// 0x32=0xf4-0xc2
|
||||
|
||||
/**
|
||||
* Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return true or false
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
|
||||
|
||||
/**
|
||||
* How many code units (bytes) are used for the UTF-8 encoding
|
||||
* of this Unicode code point?
|
||||
* @param c 32-bit code point
|
||||
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_LENGTH(c) \
|
||||
((uint32_t)(c)<=0x7f ? 1 : \
|
||||
((uint32_t)(c)<=0x7ff ? 2 : \
|
||||
((uint32_t)(c)<=0xd7ff ? 3 : \
|
||||
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
|
||||
((uint32_t)(c)<=0xffff ? 3 : 4)\
|
||||
) \
|
||||
) \
|
||||
) \
|
||||
)
|
||||
|
||||
/**
|
||||
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
|
||||
* @return 4
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_MAX_LENGTH 4
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
* The result is undefined if the offset points to an illegal UTF-8
|
||||
* byte sequence.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_GET
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_unsafe_index=(int32_t)(i); \
|
||||
U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
|
||||
U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to an illegal UTF-8 byte sequence, then
|
||||
* c is set to a negative value.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset
|
||||
* @param i int32_t string offset, must be start<=i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_GET_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_index=(i); \
|
||||
U8_SET_CP_START(s, start, _u8_get_index); \
|
||||
U8_NEXT(s, _u8_get_index, length, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to an illegal UTF-8 byte sequence, then
|
||||
* c is set to U+FFFD.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_GET() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset
|
||||
* @param i int32_t string offset, must be start<=i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_GET
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_index=(i); \
|
||||
U8_SET_CP_START(s, start, _u8_get_index); \
|
||||
U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with forward iteration --------------------------------------- */
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* The result is undefined if the offset points to a trail byte
|
||||
* or an illegal UTF-8 sequence.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_NEXT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
if((c)<0xe0) { \
|
||||
(c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
|
||||
} else if((c)<0xf0) { \
|
||||
/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
|
||||
(c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
|
||||
(i)+=2; \
|
||||
} else { \
|
||||
(c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
|
||||
(i)+=3; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
|
||||
* c is set to a negative value.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_NEXT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
|
||||
* c is set to U+FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_NEXT() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_NEXT
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
|
||||
|
||||
/** @internal */
|
||||
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
uint8_t __t = 0; \
|
||||
if((i)!=(length) && \
|
||||
/* fetch/validate/assemble all but last trail byte */ \
|
||||
((c)>=0xe0 ? \
|
||||
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
|
||||
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
|
||||
(__t&=0x3f, 1) \
|
||||
: /* U+10000..U+10FFFF */ \
|
||||
((c)-=0xf0)<=4 && \
|
||||
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
|
||||
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
|
||||
(__t=(s)[i]-0x80)<=0x3f) && \
|
||||
/* valid second-to-last trail byte */ \
|
||||
((c)=((c)<<6)|__t, ++(i)!=(length)) \
|
||||
: /* U+0080..U+07FF */ \
|
||||
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
|
||||
/* last trail byte */ \
|
||||
(__t=(s)[i]-0x80)<=0x3f && \
|
||||
((c)=((c)<<6)|__t, ++(i), 1)) { \
|
||||
} else { \
|
||||
(c)=(sub); /* ill-formed*/ \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 to 4 bytes.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
|
||||
* Otherwise, the result is undefined.
|
||||
*
|
||||
* @param s const uint8_t * string buffer
|
||||
* @param i string offset
|
||||
* @param c code point to append
|
||||
* @see U8_APPEND
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else { \
|
||||
if(__uc<=0x7ff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
} else { \
|
||||
if(__uc<=0xffff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 to 4 bytes.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Safe" macro, checks for a valid code point.
|
||||
* If a non-ASCII code point is written, checks for sufficient space in the string.
|
||||
* If the code point is not valid or trail bytes do not fit,
|
||||
* then isError is set to true.
|
||||
*
|
||||
* @param s const uint8_t * string buffer
|
||||
* @param i int32_t string offset, must be i<capacity
|
||||
* @param capacity int32_t size of the string buffer
|
||||
* @param c UChar32 code point to append
|
||||
* @param isError output UBool set to true if an error occurs, otherwise not modified
|
||||
* @see U8_APPEND_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else { \
|
||||
(isError)=true; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_FWD_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @see U8_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint8_t __b=(s)[(i)++]; \
|
||||
if(U8_IS_LEAD(__b) && (i)!=(length)) { \
|
||||
uint8_t __t1=(s)[i]; \
|
||||
if((0xe0<=__b && __b<0xf0)) { \
|
||||
if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else if(__b<0xe0) { \
|
||||
if(U8_IS_TRAIL(__t1)) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else /* c>=0xf0 */ { \
|
||||
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U8_FWD_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U8_FWD_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param n number of code points to skip
|
||||
* @see U8_FWD_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
|
||||
U8_FWD_1(s, i, length); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to a UTF-8 trail byte,
|
||||
* then the offset is moved backward to the corresponding lead byte.
|
||||
* Otherwise, it is not modified.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_SET_CP_START
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
while(U8_IS_TRAIL((s)[i])) { --(i); } \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to a UTF-8 trail byte,
|
||||
* then the offset is moved backward to the corresponding lead byte.
|
||||
* Otherwise, it is not modified.
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<=i
|
||||
* @see U8_SET_CP_START_UNSAFE
|
||||
* @see U8_TRUNCATE_IF_INCOMPLETE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U8_IS_TRAIL((s)[(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (i)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* If the string ends with a UTF-8 byte sequence that is valid so far
|
||||
* but incomplete, then reduce the length of the string to end before
|
||||
* the lead byte of that incomplete sequence.
|
||||
* For example, if the string ends with E1 80, the length is reduced by 2.
|
||||
*
|
||||
* In all other cases (the string ends with a complete sequence, or it is not
|
||||
* possible for any further trail byte to extend the trailing sequence)
|
||||
* the length remains unchanged.
|
||||
*
|
||||
* Useful for processing text split across multiple buffers
|
||||
* (save the incomplete sequence for later)
|
||||
* and for optimizing iteration
|
||||
* (check for string length only once per character).
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_SET_CP_START(), this macro never reads s[length].
|
||||
*
|
||||
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param length int32_t string length (usually start<=length)
|
||||
* @see U8_SET_CP_START
|
||||
* @stable ICU 61
|
||||
*/
|
||||
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((length)>(start)) { \
|
||||
uint8_t __b1=s[(length)-1]; \
|
||||
if(U8_IS_SINGLE(__b1)) { \
|
||||
/* common ASCII character */ \
|
||||
} else if(U8_IS_LEAD(__b1)) { \
|
||||
--(length); \
|
||||
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
|
||||
uint8_t __b2=s[(length)-2]; \
|
||||
if(0xe0<=__b2 && __b2<=0xf4) { \
|
||||
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
|
||||
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
|
||||
(length)-=2; \
|
||||
} \
|
||||
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
|
||||
uint8_t __b3=s[(length)-3]; \
|
||||
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
|
||||
(length)-=3; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset is behind an illegal UTF-8 sequence.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_PREV
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(U8_IS_TRAIL(c)) { \
|
||||
uint8_t __b, __count=1, __shift=6; \
|
||||
\
|
||||
/* c is a trail byte */ \
|
||||
(c)&=0x3f; \
|
||||
for(;;) { \
|
||||
__b=(s)[--(i)]; \
|
||||
if(__b>=0xc0) { \
|
||||
U8_MASK_LEAD_BYTE(__b, __count); \
|
||||
(c)|=(UChar32)__b<<__shift; \
|
||||
break; \
|
||||
} else { \
|
||||
(c)|=(UChar32)(__b&0x3f)<<__shift; \
|
||||
++__count; \
|
||||
__shift+=6; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_PREV() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_PREV
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_BACK_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
while(U8_IS_TRAIL((s)[--(i)])) {} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @see U8_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U8_IS_TRAIL((s)[--(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (i)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U8_BACK_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U8_BACK_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t index of the start of the string
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U8_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
U8_BACK_1(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind a partial multi-byte sequence,
|
||||
* then the offset is incremented to behind the whole sequence.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_SET_CP_LIMIT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U8_BACK_1_UNSAFE(s, i); \
|
||||
U8_FWD_1_UNSAFE(s, i); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind a partial multi-byte sequence,
|
||||
* then the offset is incremented to behind the whole sequence.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<=i<=length
|
||||
* @param length int32_t string length
|
||||
* @see U8_SET_CP_LIMIT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((start)<(i) && ((i)<(length) || (length)<0)) { \
|
||||
U8_BACK_1(s, start, i); \
|
||||
U8_FWD_1(s, i, length); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
#endif
|
||||
@@ -1,732 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1996-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* FILE NAME : UTYPES.H (formerly ptypes.h)
|
||||
*
|
||||
* Date Name Description
|
||||
* 12/11/96 helena Creation.
|
||||
* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
|
||||
* uint8, uint16, and uint32.
|
||||
* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
|
||||
* well as C++.
|
||||
* Modified to use memcpy() for uprv_arrayCopy() fns.
|
||||
* 04/14/97 aliu Added TPlatformUtilities.
|
||||
* 05/07/97 aliu Added import/export specifiers (replacing the old
|
||||
* broken EXT_CLASS). Added version number for our
|
||||
* code. Cleaned up header.
|
||||
* 6/20/97 helena Java class name change.
|
||||
* 08/11/98 stephen UErrorCode changed from typedef to enum
|
||||
* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
|
||||
* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
|
||||
* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
|
||||
* 04/20/99 stephen Cleaned up & reworked for autoconf.
|
||||
* Renamed to utypes.h.
|
||||
* 05/05/99 stephen Changed to use <inttypes.h>
|
||||
* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UTYPES_H
|
||||
#define UTYPES_H
|
||||
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
#include "unicode/uversion.h"
|
||||
#include "unicode/uconfig.h"
|
||||
#include <float.h>
|
||||
|
||||
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief Basic definitions for ICU, for both C and C++ APIs
|
||||
*
|
||||
* This file defines basic types, constants, and enumerations directly or
|
||||
* indirectly by including other header files, especially utf.h for the
|
||||
* basic character and string definitions and umachine.h for consistent
|
||||
* integer and other types.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \def U_SHOW_CPLUSPLUS_API
|
||||
* @internal
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
# ifndef U_SHOW_CPLUSPLUS_API
|
||||
# define U_SHOW_CPLUSPLUS_API 1
|
||||
# endif
|
||||
#else
|
||||
# undef U_SHOW_CPLUSPLUS_API
|
||||
# define U_SHOW_CPLUSPLUS_API 0
|
||||
#endif
|
||||
|
||||
/** @{ API visibility control */
|
||||
|
||||
/**
|
||||
* \def U_HIDE_DRAFT_API
|
||||
* Define this to 1 to request that draft API be "hidden"
|
||||
* @internal
|
||||
*/
|
||||
/**
|
||||
* \def U_HIDE_INTERNAL_API
|
||||
* Define this to 1 to request that internal API be "hidden"
|
||||
* @internal
|
||||
*/
|
||||
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
|
||||
#define U_HIDE_DRAFT_API 1
|
||||
#endif
|
||||
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
|
||||
#define U_HIDE_INTERNAL_API 1
|
||||
#endif
|
||||
|
||||
/** @} */
|
||||
|
||||
/*===========================================================================*/
|
||||
/* ICUDATA naming scheme */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_ICUDATA_TYPE_LETTER
|
||||
*
|
||||
* This is a platform-dependent string containing one letter:
|
||||
* - b for big-endian, ASCII-family platforms
|
||||
* - l for little-endian, ASCII-family platforms
|
||||
* - e for big-endian, EBCDIC-family platforms
|
||||
* This letter is part of the common data file name.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_ICUDATA_TYPE_LITLETTER
|
||||
* The non-string form of U_ICUDATA_TYPE_LETTER
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if U_CHARSET_FAMILY
|
||||
# if U_IS_BIG_ENDIAN
|
||||
/* EBCDIC - should always be BE */
|
||||
# define U_ICUDATA_TYPE_LETTER "e"
|
||||
# define U_ICUDATA_TYPE_LITLETTER e
|
||||
# else
|
||||
# error "Don't know what to do with little endian EBCDIC!"
|
||||
# define U_ICUDATA_TYPE_LETTER "x"
|
||||
# define U_ICUDATA_TYPE_LITLETTER x
|
||||
# endif
|
||||
#else
|
||||
# if U_IS_BIG_ENDIAN
|
||||
/* Big-endian ASCII */
|
||||
# define U_ICUDATA_TYPE_LETTER "b"
|
||||
# define U_ICUDATA_TYPE_LITLETTER b
|
||||
# else
|
||||
/* Little-endian ASCII */
|
||||
# define U_ICUDATA_TYPE_LETTER "l"
|
||||
# define U_ICUDATA_TYPE_LITLETTER l
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A single string literal containing the icudata stub name. i.e. 'icudt18e' for
|
||||
* ICU 1.8.x on EBCDIC, etc..
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
|
||||
#define U_USE_USRDATA 0 /**< @internal */
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
|
||||
* Defined as a literal, not a string.
|
||||
* Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
|
||||
* from the corresponding macro invocation, _before_ other macro substitutions.
|
||||
* Need a nested \#defines to get the actual version numbers rather than
|
||||
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
|
||||
* The net result will be something of the form
|
||||
* \#define U_ICU_ENTRY_POINT icudt19_dat
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
|
||||
* @internal
|
||||
*/
|
||||
#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
|
||||
|
||||
/**
|
||||
* Do not use.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DEF_ICUDATA_ENTRY_POINT
|
||||
/* affected by symbol renaming. See platform.h */
|
||||
#ifndef U_LIB_SUFFIX_C_NAME
|
||||
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
|
||||
#else
|
||||
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
|
||||
#endif
|
||||
#endif
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
* \def NULL
|
||||
* Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifndef NULL
|
||||
#ifdef __cplusplus
|
||||
#define NULL nullptr
|
||||
#else
|
||||
#define NULL ((void *)0)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Calendar/TimeZone data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Date and Time data type.
|
||||
* This is a primitive data type that holds the date and time
|
||||
* as the number of milliseconds since 1970-jan-01, 00:00 UTC.
|
||||
* UTC leap seconds are ignored.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef double UDate;
|
||||
|
||||
/** The number of milliseconds per second @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_SECOND (1000)
|
||||
/** The number of milliseconds per minute @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_MINUTE (60000)
|
||||
/** The number of milliseconds per hour @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_HOUR (3600000)
|
||||
/** The number of milliseconds per day @stable ICU 2.0 */
|
||||
#define U_MILLIS_PER_DAY (86400000)
|
||||
|
||||
/**
|
||||
* Maximum UDate value
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_DATE_MAX DBL_MAX
|
||||
|
||||
/**
|
||||
* Minimum UDate value
|
||||
* @stable ICU 4.8
|
||||
*/
|
||||
#define U_DATE_MIN -U_DATE_MAX
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Shared library/DLL import-export API control */
|
||||
/*===========================================================================*/
|
||||
|
||||
/*
|
||||
* Control of symbol import/export.
|
||||
* ICU is separated into three libraries.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_COMBINED_IMPLEMENTATION
|
||||
* Set to export library symbols from inside the ICU library
|
||||
* when all of ICU is in a single library.
|
||||
* This can be set as a compiler option while building ICU, and it
|
||||
* needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_DATA_API
|
||||
* Set to export library symbols from inside the stubdata library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_COMMON_API
|
||||
* Set to export library symbols from inside the common library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_I18N_API
|
||||
* Set to export library symbols from inside the i18n library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_LAYOUT_API
|
||||
* Set to export library symbols from inside the layout engine library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_LAYOUTEX_API
|
||||
* Set to export library symbols from inside the layout extensions library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_IO_API
|
||||
* Set to export library symbols from inside the ustdio library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_TOOLUTIL_API
|
||||
* Set to export library symbols from inside the toolutil library,
|
||||
* and to import them from outside.
|
||||
* @stable ICU 3.4
|
||||
*/
|
||||
|
||||
#ifdef U_IN_DOXYGEN
|
||||
// This definition is required when generating the API docs.
|
||||
#define U_COMBINED_IMPLEMENTATION 1
|
||||
#endif
|
||||
|
||||
#if defined(U_COMBINED_IMPLEMENTATION)
|
||||
#define U_DATA_API U_EXPORT
|
||||
#define U_COMMON_API U_EXPORT
|
||||
#define U_I18N_API U_EXPORT
|
||||
#define U_LAYOUT_API U_EXPORT
|
||||
#define U_LAYOUTEX_API U_EXPORT
|
||||
#define U_IO_API U_EXPORT
|
||||
#define U_TOOLUTIL_API U_EXPORT
|
||||
#elif defined(U_STATIC_IMPLEMENTATION)
|
||||
#define U_DATA_API
|
||||
#define U_COMMON_API
|
||||
#define U_I18N_API
|
||||
#define U_LAYOUT_API
|
||||
#define U_LAYOUTEX_API
|
||||
#define U_IO_API
|
||||
#define U_TOOLUTIL_API
|
||||
#elif defined(U_COMMON_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_EXPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_I18N_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_EXPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_LAYOUT_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_EXPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_LAYOUTEX_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_EXPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_IO_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_EXPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#elif defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_EXPORT
|
||||
#else
|
||||
#define U_DATA_API U_IMPORT
|
||||
#define U_COMMON_API U_IMPORT
|
||||
#define U_I18N_API U_IMPORT
|
||||
#define U_LAYOUT_API U_IMPORT
|
||||
#define U_LAYOUTEX_API U_IMPORT
|
||||
#define U_IO_API U_IMPORT
|
||||
#define U_TOOLUTIL_API U_IMPORT
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_STANDARD_CPP_NAMESPACE
|
||||
* Control of C++ Namespace
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
#define U_STANDARD_CPP_NAMESPACE ::
|
||||
#else
|
||||
#define U_STANDARD_CPP_NAMESPACE
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* UErrorCode */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Standard ICU4C error code type, a substitute for exceptions.
|
||||
*
|
||||
* Initialize the UErrorCode with U_ZERO_ERROR, and check for success or
|
||||
* failure using U_SUCCESS() or U_FAILURE():
|
||||
*
|
||||
* UErrorCode errorCode = U_ZERO_ERROR;
|
||||
* // call ICU API that needs an error code parameter.
|
||||
* if (U_FAILURE(errorCode)) {
|
||||
* // An error occurred. Handle it here.
|
||||
* }
|
||||
*
|
||||
* C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a
|
||||
* suitable subclass.
|
||||
*
|
||||
* For more information, see:
|
||||
* http://icu-project.org/userguide/conventions
|
||||
*
|
||||
* Note: By convention, ICU functions that take a reference (C++) or a pointer
|
||||
* (C) to a UErrorCode first test:
|
||||
*
|
||||
* if (U_FAILURE(errorCode)) { return immediately; }
|
||||
*
|
||||
* so that in a chain of such functions the first one that sets an error code
|
||||
* causes the following ones to not perform any operations.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
typedef enum UErrorCode {
|
||||
/* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
|
||||
* and is that way because VC++ debugger displays first encountered constant,
|
||||
* which is not the what the code is used for
|
||||
*/
|
||||
|
||||
U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
|
||||
|
||||
U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
|
||||
|
||||
U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
|
||||
|
||||
U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
|
||||
|
||||
U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
|
||||
|
||||
U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
|
||||
|
||||
U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
|
||||
|
||||
U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
|
||||
|
||||
U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
|
||||
|
||||
U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal UErrorCode warning value.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_ERROR_WARNING_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
U_ZERO_ERROR = 0, /**< No error, no warning. */
|
||||
|
||||
U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
|
||||
U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
|
||||
U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
|
||||
U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
|
||||
U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
|
||||
U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
|
||||
U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
|
||||
U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
|
||||
U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
|
||||
U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
|
||||
U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
|
||||
U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
|
||||
U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
|
||||
U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
|
||||
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
|
||||
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
|
||||
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
|
||||
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
|
||||
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
|
||||
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
|
||||
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
|
||||
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
|
||||
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
|
||||
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
|
||||
It is very possible that a circular alias definition has occurred */
|
||||
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
|
||||
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
|
||||
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
|
||||
U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
|
||||
U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
|
||||
U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* The input is impractically long for an operation.
|
||||
* It is rejected because it may lead to problems such as excessive
|
||||
* processing time, stack depth, or heap memory requirements.
|
||||
*
|
||||
* @draft ICU 68
|
||||
*/
|
||||
U_INPUT_TOO_LONG_ERROR = 31,
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest standard error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_STANDARD_ERROR_LIMIT = 32,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10000 0x10100 are reserved for Transliterator.
|
||||
*/
|
||||
U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
|
||||
U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
|
||||
U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
|
||||
U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
|
||||
U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
|
||||
U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
|
||||
U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
|
||||
U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
|
||||
U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
|
||||
U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
|
||||
U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
|
||||
U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
|
||||
U_MISSING_OPERATOR, /**< A rule contains no operator */
|
||||
U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
|
||||
U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
|
||||
U_MULTIPLE_CURSORS, /**< More than one cursor */
|
||||
U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
|
||||
U_TRAILING_BACKSLASH, /**< A dangling backslash */
|
||||
U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
|
||||
U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
|
||||
U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
|
||||
U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
|
||||
U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
|
||||
U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
|
||||
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
|
||||
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
|
||||
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
|
||||
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
|
||||
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
|
||||
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
|
||||
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
|
||||
U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
|
||||
U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
|
||||
U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
|
||||
U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
|
||||
U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal Transliterator error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_PARSE_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10100 0x10200 are reserved for the formatting API.
|
||||
*/
|
||||
U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
|
||||
U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
|
||||
U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
|
||||
U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
|
||||
U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
|
||||
U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
|
||||
U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
|
||||
U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
|
||||
U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
|
||||
U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
|
||||
U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
|
||||
U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
|
||||
U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
|
||||
U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
|
||||
U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
|
||||
U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
|
||||
U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
|
||||
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
|
||||
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
|
||||
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
|
||||
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */
|
||||
U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal formatting API error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_FMT_PARSE_ERROR_LIMIT = 0x10114,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10200 0x102ff are reserved for BreakIterator.
|
||||
*/
|
||||
U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
|
||||
U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
|
||||
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
|
||||
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
|
||||
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
|
||||
U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
|
||||
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
|
||||
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
|
||||
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
|
||||
U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
|
||||
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
|
||||
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
|
||||
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
|
||||
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
|
||||
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal BreakIterator error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_BRK_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors.
|
||||
*/
|
||||
U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
|
||||
U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
|
||||
U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
|
||||
U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
|
||||
U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
|
||||
U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
|
||||
U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
|
||||
U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
|
||||
U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
|
||||
U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
|
||||
U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
|
||||
U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
|
||||
U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
|
||||
U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
|
||||
U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */
|
||||
U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
|
||||
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
|
||||
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
|
||||
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
|
||||
U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */
|
||||
U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal regular expression error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes.
|
||||
*/
|
||||
U_IDNA_PROHIBITED_ERROR=0x10400,
|
||||
U_IDNA_ERROR_START=0x10400,
|
||||
U_IDNA_UNASSIGNED_ERROR,
|
||||
U_IDNA_CHECK_BIDI_ERROR,
|
||||
U_IDNA_STD3_ASCII_RULES_ERROR,
|
||||
U_IDNA_ACE_PREFIX_ERROR,
|
||||
U_IDNA_VERIFICATION_ERROR,
|
||||
U_IDNA_LABEL_TOO_LONG_ERROR,
|
||||
U_IDNA_ZERO_LENGTH_LABEL_ERROR,
|
||||
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal IDNA error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_IDNA_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
/*
|
||||
* Aliases for StringPrep
|
||||
*/
|
||||
U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
|
||||
U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
|
||||
U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
|
||||
|
||||
/*
|
||||
* Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes.
|
||||
*/
|
||||
U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
|
||||
U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
|
||||
U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal plug-in error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_PLUGIN_ERROR_LIMIT,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UErrorCode;
|
||||
|
||||
/* Use the following to determine if an UErrorCode represents */
|
||||
/* operational success or failure. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Does the error code indicate success?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static
|
||||
inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
|
||||
/**
|
||||
* Does the error code indicate a failure?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static
|
||||
inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
|
||||
#else
|
||||
/**
|
||||
* Does the error code indicate success?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
|
||||
/**
|
||||
* Does the error code indicate a failure?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Return a string for a UErrorCode value.
|
||||
* The string will be the same as the name of the error code constant
|
||||
* in the UErrorCode enum above.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2
|
||||
u_errorName(UErrorCode code);
|
||||
|
||||
|
||||
#endif /* _UTYPES */
|
||||
@@ -1,198 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2000-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* file name: uvernum.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* Created by: Vladimir Weinstein
|
||||
* Updated by: Steven R. Loomis
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: definitions of ICU version numbers
|
||||
*
|
||||
* This file is included by uversion.h and other files. This file contains only
|
||||
* macros and definitions. The actual version numbers are defined here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* IMPORTANT: When updating version, the following things need to be done:
|
||||
* source/common/unicode/uvernum.h - this file: update major, minor,
|
||||
* patchlevel, suffix, version, short version constants, namespace,
|
||||
* renaming macro, and copyright
|
||||
*
|
||||
* The following files need to be updated as well, which can be done
|
||||
* by running the UNIX makefile target 'update-windows-makefiles' in icu/source.
|
||||
*
|
||||
*
|
||||
* source/common/common_uwp.vcxproj
|
||||
* source/common/common.vcxproj - update 'Output file name' on the link tab so
|
||||
* that it contains the new major/minor combination
|
||||
* source/i18n/i18n.vcxproj - same as for the common.vcxproj
|
||||
* source/i18n/i18n_uwp.vcxproj - same as for the common_uwp.vcxproj
|
||||
* source/layoutex/layoutex.vcproj - same
|
||||
* source/stubdata/stubdata.vcproj - same as for the common.vcxproj
|
||||
* source/io/io.vcproj - same as for the common.vcxproj
|
||||
* source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
|
||||
* the new major/minor combination and the Unicode version.
|
||||
*/
|
||||
|
||||
#ifndef UVERNUM_H
|
||||
#define UVERNUM_H
|
||||
|
||||
/** The standard copyright notice that gets compiled into each library.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_COPYRIGHT_STRING \
|
||||
" Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html "
|
||||
|
||||
/** The current ICU major version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_MAJOR_NUM 69
|
||||
|
||||
/** The current ICU minor version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_MINOR_NUM 1
|
||||
|
||||
/** The current ICU patchlevel version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_PATCHLEVEL_NUM 0
|
||||
|
||||
/** The current ICU build level version as an integer.
|
||||
* This value is for use by ICU clients. It defaults to 0.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
|
||||
#define U_ICU_VERSION_BUILDLEVEL_NUM 0
|
||||
#endif
|
||||
|
||||
/** Glued version suffix for renamers
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SUFFIX _69
|
||||
|
||||
/**
|
||||
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
|
||||
* @internal
|
||||
*/
|
||||
/**
|
||||
* \def U_DEF_ICU_ENTRY_POINT_RENAME
|
||||
* @internal
|
||||
*/
|
||||
/** Glued version suffix function for renamers
|
||||
* This value will change in the subsequent releases of ICU.
|
||||
* If a custom suffix (such as matching library suffixes) is desired, this can be modified.
|
||||
* Note that if present, platform.h may contain an earlier definition of this macro.
|
||||
* \def U_ICU_ENTRY_POINT_RENAME
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
/**
|
||||
* Disable the version suffix. Use the custom suffix if exists.
|
||||
* \def U_DISABLE_VERSION_SUFFIX
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_DISABLE_VERSION_SUFFIX
|
||||
#define U_DISABLE_VERSION_SUFFIX 0
|
||||
#endif
|
||||
|
||||
#ifndef U_ICU_ENTRY_POINT_RENAME
|
||||
#ifdef U_HAVE_LIB_SUFFIX
|
||||
# if !U_DISABLE_VERSION_SUFFIX
|
||||
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z
|
||||
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
|
||||
# else
|
||||
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
|
||||
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_LIB_SUFFIX_C_NAME)
|
||||
# endif
|
||||
#else
|
||||
# if !U_DISABLE_VERSION_SUFFIX
|
||||
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
|
||||
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
|
||||
# else
|
||||
# define U_ICU_ENTRY_POINT_RENAME(x) x
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** The current ICU library version as a dotted-decimal string. The patchlevel
|
||||
* only appears in this string if it non-zero.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION "69.1"
|
||||
|
||||
/**
|
||||
* The current ICU library major version number as a string, for library name suffixes.
|
||||
* This value will change in subsequent releases of ICU.
|
||||
*
|
||||
* Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
|
||||
* into one string without dots ("48").
|
||||
* Since ICU 49, it is the double-digit major ICU version number.
|
||||
* See https://unicode-org.github.io/icu/userguide/design#version-numbers-in-icu
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SHORT "69"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Data version in ICU4C.
|
||||
* @internal ICU 4.4 Internal Use Only
|
||||
**/
|
||||
#define U_ICU_DATA_VERSION "69.1"
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*===========================================================================
|
||||
* ICU collation framework version information
|
||||
* Version info that can be obtained from a collator is affected by these
|
||||
* numbers in a secret and magic way. Please use collator version as whole
|
||||
*===========================================================================
|
||||
*/
|
||||
|
||||
/**
|
||||
* Collation runtime version (sort key generator, strcoll).
|
||||
* If the version is different, sort keys for the same string could be different.
|
||||
* This value may change in subsequent releases of ICU.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define UCOL_RUNTIME_VERSION 9
|
||||
|
||||
/**
|
||||
* Collation builder code version.
|
||||
* When this is different, the same tailoring might result
|
||||
* in assigning different collation elements to code points.
|
||||
* This value may change in subsequent releases of ICU.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define UCOL_BUILDER_VERSION 9
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Constant 1.
|
||||
* This was intended to be the version of collation tailorings,
|
||||
* but instead the tailoring data carries a version number.
|
||||
* @deprecated ICU 54
|
||||
*/
|
||||
#define UCOL_TAILORINGS_VERSION 1
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#endif
|
||||
@@ -1,187 +0,0 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2000-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* file name: uversion.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* Created by: Vladimir Weinstein
|
||||
*
|
||||
* Gets included by utypes.h and Windows .rc files
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: API for accessing ICU version numbers.
|
||||
*/
|
||||
/*===========================================================================*/
|
||||
/* Main ICU version information */
|
||||
/*===========================================================================*/
|
||||
|
||||
#ifndef UVERSION_H
|
||||
#define UVERSION_H
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
|
||||
/* Actual version info lives in uvernum.h */
|
||||
#include "unicode/uvernum.h"
|
||||
|
||||
/** Maximum length of the copyright string.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_COPYRIGHT_STRING_LENGTH 128
|
||||
|
||||
/** An ICU version consists of up to 4 numbers from 0..255.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_MAX_VERSION_LENGTH 4
|
||||
|
||||
/** In a string, ICU version fields are delimited by dots.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_VERSION_DELIMITER '.'
|
||||
|
||||
/** The maximum length of an ICU version string.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_MAX_VERSION_STRING_LENGTH 20
|
||||
|
||||
/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
|
||||
* To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
|
||||
|
||||
/*===========================================================================*/
|
||||
/* C++ namespace if supported. Versioned unless versioning is disabled. */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* Define C++ namespace symbols. */
|
||||
#ifdef __cplusplus
|
||||
|
||||
/**
|
||||
* \def U_NAMESPACE_BEGIN
|
||||
* This is used to begin a declaration of a public ICU C++ API within
|
||||
* versioned-ICU-namespace block.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_NAMESPACE_END
|
||||
* This is used to end a declaration of a public ICU C++ API.
|
||||
* It ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_NAMESPACE_USE
|
||||
* This is used to specify that the rest of the code uses the
|
||||
* public ICU C++ API namespace.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_NAMESPACE_QUALIFIER
|
||||
* This is used to qualify that a function or class is part of
|
||||
* the public ICU C++ API namespace.
|
||||
*
|
||||
* This macro is unnecessary since ICU 49 requires namespace support.
|
||||
* You can just use "icu::" instead.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
# if U_DISABLE_RENAMING
|
||||
# define U_ICU_NAMESPACE icu
|
||||
namespace U_ICU_NAMESPACE { }
|
||||
# else
|
||||
# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu)
|
||||
namespace U_ICU_NAMESPACE { }
|
||||
namespace icu = U_ICU_NAMESPACE;
|
||||
# endif
|
||||
|
||||
# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE {
|
||||
# define U_NAMESPACE_END }
|
||||
# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
|
||||
# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
|
||||
|
||||
# ifndef U_USING_ICU_NAMESPACE
|
||||
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
|
||||
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
|
||||
defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
# define U_USING_ICU_NAMESPACE 0
|
||||
# else
|
||||
# define U_USING_ICU_NAMESPACE 0
|
||||
# endif
|
||||
# endif
|
||||
# if U_USING_ICU_NAMESPACE
|
||||
U_NAMESPACE_USE
|
||||
# endif
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*===========================================================================*/
|
||||
/* General version helper functions. Definitions in putil.c */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Parse a string with dotted-decimal version information and
|
||||
* fill in a UVersionInfo structure with the result.
|
||||
* Definition of this function lives in putil.c
|
||||
*
|
||||
* @param versionArray The destination structure for the version information.
|
||||
* @param versionString A string with dotted-decimal version information,
|
||||
* with up to four non-negative number fields with
|
||||
* values of up to 255 each.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_versionFromString(UVersionInfo versionArray, const char *versionString);
|
||||
|
||||
/**
|
||||
* Parse a Unicode string with dotted-decimal version information and
|
||||
* fill in a UVersionInfo structure with the result.
|
||||
* Definition of this function lives in putil.c
|
||||
*
|
||||
* @param versionArray The destination structure for the version information.
|
||||
* @param versionString A Unicode string with dotted-decimal version
|
||||
* information, with up to four non-negative number
|
||||
* fields with values of up to 255 each.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_versionFromUString(UVersionInfo versionArray, const UChar *versionString);
|
||||
|
||||
|
||||
/**
|
||||
* Write a string with dotted-decimal version information according
|
||||
* to the input UVersionInfo.
|
||||
* Definition of this function lives in putil.c
|
||||
*
|
||||
* @param versionArray The version information to be written as a string.
|
||||
* @param versionString A string buffer that will be filled in with
|
||||
* a string corresponding to the numeric version
|
||||
* information in versionArray.
|
||||
* The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_versionToString(const UVersionInfo versionArray, char *versionString);
|
||||
|
||||
/**
|
||||
* Gets the ICU release version. The version array stores the version information
|
||||
* for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.
|
||||
* Definition of this function lives in putil.c
|
||||
*
|
||||
* @param versionArray the version # information, the result will be filled in
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_getVersion(UVersionInfo versionArray);
|
||||
#endif
|
||||
@@ -3,59 +3,8 @@ aff_data.cxx aff_data.hxx
|
||||
checker.cxx checker.hxx
|
||||
suggester.cxx suggester.hxx
|
||||
dictionary.cxx dictionary.hxx
|
||||
finder.cxx finder.hxx
|
||||
unicode.hxx
|
||||
utils.cxx utils.hxx
|
||||
structures.hxx)
|
||||
|
||||
add_library(Nuspell::nuspell ALIAS nuspell)
|
||||
|
||||
#include(GenerateExportHeader)
|
||||
#generate_export_header(nuspell)
|
||||
|
||||
#set(nuspell_headers aff_data.hxx checker.hxx suggester.hxx dictionary.hxx
|
||||
# finder.hxx structures.hxx unicode.hxx
|
||||
# ${CMAKE_CURRENT_BINARY_DIR}/nuspell_export.h)
|
||||
#[[set_target_properties(nuspell PROPERTIES
|
||||
PUBLIC_HEADER "${nuspell_headers}"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
SOVERSION ${PROJECT_VERSION_MAJOR}f
|
||||
CXX_VISIBILITY_PRESET hidden)]]
|
||||
|
||||
#target_compile_features(nuspell PUBLIC cxx_std_17)
|
||||
|
||||
#[[target_include_directories(nuspell
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
|
||||
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
|
||||
|
||||
target_link_libraries(nuspell PUBLIC ICU::uc ICU::data)
|
||||
|
||||
add_executable(nuspell-bin main.cxx)
|
||||
set_target_properties(nuspell-bin PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY ../tools
|
||||
RUNTIME_OUTPUT_NAME nuspell)
|
||||
target_compile_definitions(nuspell-bin PRIVATE
|
||||
PROJECT_VERSION=\"${PROJECT_VERSION}\")
|
||||
target_link_libraries(nuspell-bin nuspell)
|
||||
if (BUILD_SHARED_LIBS AND WIN32)
|
||||
# This should be PRE_LINK (or PRE_BUILD), so Vcpkg's POST_BUILD
|
||||
# step (see VCPKG_APPLOCAL_DEPS) that copies dll can pick up nuspell.dll
|
||||
# inside the folder ../tools.
|
||||
add_custom_command(TARGET nuspell-bin PRE_LINK
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
$<TARGET_FILE:nuspell> $<TARGET_FILE_DIR:nuspell-bin>)
|
||||
endif()
|
||||
|
||||
if (NOT subproject)
|
||||
install(TARGETS nuspell
|
||||
EXPORT NuspellTargets
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/nuspell)
|
||||
install(EXPORT NuspellTargets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/nuspell
|
||||
NAMESPACE Nuspell::)
|
||||
install(TARGETS nuspell-bin DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endif()]]
|
||||
|
||||
165
app/src/main/cpp/nuspell/LICENSE.txt
Normal file
165
app/src/main/cpp/nuspell/LICENSE.txt
Normal file
@@ -0,0 +1,165 @@
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
|
||||
This version of the GNU Lesser General Public License incorporates
|
||||
the terms and conditions of version 3 of the GNU General Public
|
||||
License, supplemented by the additional permissions listed below.
|
||||
|
||||
0. Additional Definitions.
|
||||
|
||||
As used herein, "this License" refers to version 3 of the GNU Lesser
|
||||
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
||||
General Public License.
|
||||
|
||||
"The Library" refers to a covered work governed by this License,
|
||||
other than an Application or a Combined Work as defined below.
|
||||
|
||||
An "Application" is any work that makes use of an interface provided
|
||||
by the Library, but which is not otherwise based on the Library.
|
||||
Defining a subclass of a class defined by the Library is deemed a mode
|
||||
of using an interface provided by the Library.
|
||||
|
||||
A "Combined Work" is a work produced by combining or linking an
|
||||
Application with the Library. The particular version of the Library
|
||||
with which the Combined Work was made is also called the "Linked
|
||||
Version".
|
||||
|
||||
The "Minimal Corresponding Source" for a Combined Work means the
|
||||
Corresponding Source for the Combined Work, excluding any source code
|
||||
for portions of the Combined Work that, considered in isolation, are
|
||||
based on the Application, and not on the Linked Version.
|
||||
|
||||
The "Corresponding Application Code" for a Combined Work means the
|
||||
object code and/or source code for the Application, including any data
|
||||
and utility programs needed for reproducing the Combined Work from the
|
||||
Application, but excluding the System Libraries of the Combined Work.
|
||||
|
||||
1. Exception to Section 3 of the GNU GPL.
|
||||
|
||||
You may convey a covered work under sections 3 and 4 of this License
|
||||
without being bound by section 3 of the GNU GPL.
|
||||
|
||||
2. Conveying Modified Versions.
|
||||
|
||||
If you modify a copy of the Library, and, in your modifications, a
|
||||
facility refers to a function or data to be supplied by an Application
|
||||
that uses the facility (other than as an argument passed when the
|
||||
facility is invoked), then you may convey a copy of the modified
|
||||
version:
|
||||
|
||||
a) under this License, provided that you make a good faith effort to
|
||||
ensure that, in the event an Application does not supply the
|
||||
function or data, the facility still operates, and performs
|
||||
whatever part of its purpose remains meaningful, or
|
||||
|
||||
b) under the GNU GPL, with none of the additional permissions of
|
||||
this License applicable to that copy.
|
||||
|
||||
3. Object Code Incorporating Material from Library Header Files.
|
||||
|
||||
The object code form of an Application may incorporate material from
|
||||
a header file that is part of the Library. You may convey such object
|
||||
code under terms of your choice, provided that, if the incorporated
|
||||
material is not limited to numerical parameters, data structure
|
||||
layouts and accessors, or small macros, inline functions and templates
|
||||
(ten or fewer lines in length), you do both of the following:
|
||||
|
||||
a) Give prominent notice with each copy of the object code that the
|
||||
Library is used in it and that the Library and its use are
|
||||
covered by this License.
|
||||
|
||||
b) Accompany the object code with a copy of the GNU GPL and this license
|
||||
document.
|
||||
|
||||
4. Combined Works.
|
||||
|
||||
You may convey a Combined Work under terms of your choice that,
|
||||
taken together, effectively do not restrict modification of the
|
||||
portions of the Library contained in the Combined Work and reverse
|
||||
engineering for debugging such modifications, if you also do each of
|
||||
the following:
|
||||
|
||||
a) Give prominent notice with each copy of the Combined Work that
|
||||
the Library is used in it and that the Library and its use are
|
||||
covered by this License.
|
||||
|
||||
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
||||
document.
|
||||
|
||||
c) For a Combined Work that displays copyright notices during
|
||||
execution, include the copyright notice for the Library among
|
||||
these notices, as well as a reference directing the user to the
|
||||
copies of the GNU GPL and this license document.
|
||||
|
||||
d) Do one of the following:
|
||||
|
||||
0) Convey the Minimal Corresponding Source under the terms of this
|
||||
License, and the Corresponding Application Code in a form
|
||||
suitable for, and under terms that permit, the user to
|
||||
recombine or relink the Application with a modified version of
|
||||
the Linked Version to produce a modified Combined Work, in the
|
||||
manner specified by section 6 of the GNU GPL for conveying
|
||||
Corresponding Source.
|
||||
|
||||
1) Use a suitable shared library mechanism for linking with the
|
||||
Library. A suitable mechanism is one that (a) uses at run time
|
||||
a copy of the Library already present on the user's computer
|
||||
system, and (b) will operate properly with a modified version
|
||||
of the Library that is interface-compatible with the Linked
|
||||
Version.
|
||||
|
||||
e) Provide Installation Information, but only if you would otherwise
|
||||
be required to provide such information under section 6 of the
|
||||
GNU GPL, and only to the extent that such information is
|
||||
necessary to install and execute a modified version of the
|
||||
Combined Work produced by recombining or relinking the
|
||||
Application with a modified version of the Linked Version. (If
|
||||
you use option 4d0, the Installation Information must accompany
|
||||
the Minimal Corresponding Source and Corresponding Application
|
||||
Code. If you use option 4d1, you must provide the Installation
|
||||
Information in the manner specified by section 6 of the GNU GPL
|
||||
for conveying Corresponding Source.)
|
||||
|
||||
5. Combined Libraries.
|
||||
|
||||
You may place library facilities that are a work based on the
|
||||
Library side by side in a single library together with other library
|
||||
facilities that are not Applications and are not covered by this
|
||||
License, and convey such a combined library under terms of your
|
||||
choice, if you do both of the following:
|
||||
|
||||
a) Accompany the combined library with a copy of the same work based
|
||||
on the Library, uncombined with any other library facilities,
|
||||
conveyed under the terms of this License.
|
||||
|
||||
b) Give prominent notice with the combined library that part of it
|
||||
is a work based on the Library, and explaining where to find the
|
||||
accompanying uncombined form of the same work.
|
||||
|
||||
6. Revised Versions of the GNU Lesser General Public License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions
|
||||
of the GNU Lesser General Public License from time to time. Such new
|
||||
versions will be similar in spirit to the present version, but may
|
||||
differ in detail to address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Library as you received it specifies that a certain numbered version
|
||||
of the GNU Lesser General Public License "or any later version"
|
||||
applies to it, you have the option of following the terms and
|
||||
conditions either of that published version or of any later version
|
||||
published by the Free Software Foundation. If the Library as you
|
||||
received it does not specify a version number of the GNU Lesser
|
||||
General Public License, you may choose any version of the GNU Lesser
|
||||
General Public License ever published by the Free Software Foundation.
|
||||
|
||||
If the Library as you received it specifies that a proxy can decide
|
||||
whether future versions of the GNU Lesser General Public License shall
|
||||
apply, that proxy's public statement of acceptance of any version is
|
||||
permanent authorization for you to choose that version for the
|
||||
Library.
|
||||
@@ -1,574 +0,0 @@
|
||||
/* Copyright 2016-2021 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Nuspell.
|
||||
*
|
||||
* Nuspell is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Nuspell is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "finder.hxx"
|
||||
#include "utils.hxx"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#if !defined(_WIN32) && \
|
||||
(defined(__unix__) || defined(__unix) || \
|
||||
(defined(__APPLE__) && defined(__MACH__)) || defined(__HAIKU__))
|
||||
#include <unistd.h>
|
||||
#ifdef _POSIX_VERSION
|
||||
#include <dirent.h>
|
||||
#include "glob_ndk/glob_ndk.h"
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#elif defined(_WIN32)
|
||||
|
||||
#include <io.h>
|
||||
#include <windows.h>
|
||||
|
||||
#ifdef __MINGW32__
|
||||
#include <dirent.h>
|
||||
//#include <glob.h> //not present in mingw-w64. present in vanilla mingw
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#endif //__MINGW32__
|
||||
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace nuspell {
|
||||
inline namespace v5 {
|
||||
#ifdef _WIN32
|
||||
const auto PATHSEP = ';';
|
||||
const auto DIRSEP = '\\';
|
||||
#else
|
||||
const auto PATHSEP = ':';
|
||||
const auto DIRSEP = '/';
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Append the paths of the default directories to be searched for
|
||||
* dictionaries.
|
||||
* @param paths vector of directory paths to append to
|
||||
*/
|
||||
auto append_default_dir_paths(std::vector<string>& paths) -> void
|
||||
{
|
||||
auto dicpath = getenv("DICPATH");
|
||||
if (dicpath && *dicpath)
|
||||
split(dicpath, PATHSEP, paths);
|
||||
|
||||
#ifdef _POSIX_VERSION
|
||||
auto home = getenv("HOME");
|
||||
auto xdg_data_home = getenv("XDG_DATA_HOME");
|
||||
if (xdg_data_home && *xdg_data_home)
|
||||
paths.push_back(xdg_data_home + string("/hunspell"));
|
||||
else if (home)
|
||||
paths.push_back(home + string("/.local/share/hunspell"));
|
||||
|
||||
auto xdg_data_dirs = getenv("XDG_DATA_DIRS");
|
||||
if (xdg_data_dirs && *xdg_data_dirs) {
|
||||
auto data_dirs = string_view(xdg_data_dirs);
|
||||
|
||||
auto i = paths.size();
|
||||
split(data_dirs, PATHSEP, paths);
|
||||
for (; i != paths.size(); ++i)
|
||||
paths[i] += "/hunspell";
|
||||
|
||||
i = paths.size();
|
||||
split(data_dirs, PATHSEP, paths);
|
||||
for (; i != paths.size(); ++i)
|
||||
paths[i] += "/myspell";
|
||||
}
|
||||
else {
|
||||
paths.push_back("/usr/local/share/hunspell");
|
||||
paths.push_back("/usr/share/hunspell");
|
||||
paths.push_back("/usr/local/share/myspell");
|
||||
paths.push_back("/usr/share/myspell");
|
||||
}
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
auto osx = string("/Library/Spelling");
|
||||
if (home) {
|
||||
paths.push_back(home + osx);
|
||||
}
|
||||
paths.push_back(osx);
|
||||
#endif
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
auto winpaths = {getenv("LOCALAPPDATA"), getenv("PROGRAMDATA")};
|
||||
for (auto& p : winpaths) {
|
||||
if (p) {
|
||||
paths.push_back(string(p) + "\\hunspell");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
class FileListerWindows {
|
||||
struct _finddata_t data = {};
|
||||
intptr_t handle = -1;
|
||||
bool goodbit = false;
|
||||
|
||||
public:
|
||||
FileListerWindows() {}
|
||||
FileListerWindows(const char* pattern) { first(pattern); }
|
||||
FileListerWindows(const string& pattern) { first(pattern); }
|
||||
FileListerWindows(const FileListerWindows& d) = delete;
|
||||
void operator=(const FileListerWindows& d) = delete;
|
||||
~FileListerWindows() { close(); }
|
||||
|
||||
auto first(const char* pattern) -> bool
|
||||
{
|
||||
close();
|
||||
handle = _findfirst(pattern, &data);
|
||||
goodbit = handle != -1;
|
||||
return goodbit;
|
||||
}
|
||||
auto first(const string& pattern) -> bool
|
||||
{
|
||||
return first(pattern.c_str());
|
||||
}
|
||||
|
||||
auto name() const -> const char* { return data.name; }
|
||||
auto good() const -> bool { return goodbit; }
|
||||
auto next() -> bool
|
||||
{
|
||||
goodbit = _findnext(handle, &data) == 0;
|
||||
return goodbit;
|
||||
}
|
||||
auto close() -> void
|
||||
{
|
||||
if (handle == -1)
|
||||
return;
|
||||
_findclose(handle);
|
||||
handle = -1;
|
||||
goodbit = false;
|
||||
}
|
||||
auto list_all() -> vector<string>
|
||||
{
|
||||
vector<string> ret;
|
||||
for (; good(); next()) {
|
||||
ret.push_back(name());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef _POSIX_VERSION
|
||||
class Globber {
|
||||
private:
|
||||
glob_t g = {};
|
||||
int ret = 1;
|
||||
|
||||
public:
|
||||
Globber(const char* pattern) { ret = ::glob(pattern, 0, nullptr, &g); }
|
||||
Globber(const string& pattern) : Globber(pattern.c_str()) {}
|
||||
Globber(const Globber&) = delete;
|
||||
auto operator=(const Globber&) = delete;
|
||||
auto glob(const char* pattern) -> bool
|
||||
{
|
||||
globfree(&g);
|
||||
ret = ::glob(pattern, 0, nullptr, &g);
|
||||
return ret == 0;
|
||||
}
|
||||
auto glob(const string& pattern) -> bool
|
||||
{
|
||||
return glob(pattern.c_str());
|
||||
}
|
||||
auto begin() -> const char* const* { return g.gl_pathv; }
|
||||
auto end() -> const char* const* { return begin() + g.gl_pathc; }
|
||||
auto append_glob_paths_to(vector<string>& out) -> void
|
||||
{
|
||||
if (ret == 0)
|
||||
out.insert(out.end(), begin(), end());
|
||||
}
|
||||
~Globber() { globfree(&g); }
|
||||
};
|
||||
#elif defined(_WIN32)
|
||||
class Globber {
|
||||
vector<string> data;
|
||||
|
||||
public:
|
||||
Globber(const char* pattern) { glob(pattern); }
|
||||
Globber(const string& pattern) { glob(pattern); }
|
||||
auto glob(const char* pattern) -> bool { return glob(string(pattern)); }
|
||||
auto glob(const string& pattern) -> bool
|
||||
{
|
||||
data.clear();
|
||||
|
||||
if (pattern.empty())
|
||||
return false;
|
||||
auto first_two = pattern.substr(0, 2);
|
||||
if (first_two == "\\\\" || first_two == "//" ||
|
||||
first_two == "\\/" || first_two == "//")
|
||||
return false;
|
||||
|
||||
auto q1 = vector<string>();
|
||||
auto q2 = q1;
|
||||
auto v = q1;
|
||||
|
||||
split_on_any_of(pattern, "\\/", v);
|
||||
auto i = v.begin();
|
||||
if (i == v.end())
|
||||
return false;
|
||||
|
||||
FileListerWindows fl;
|
||||
|
||||
if (i->find(':') != i->npos) {
|
||||
// absolute path
|
||||
q1.push_back(*i++);
|
||||
}
|
||||
else if (pattern[0] == '\\' || pattern[0] == '/') {
|
||||
// relative to drive
|
||||
q1.push_back("");
|
||||
}
|
||||
else {
|
||||
// relative
|
||||
q1.push_back(".");
|
||||
}
|
||||
for (; i != v.end(); ++i) {
|
||||
if (i->empty())
|
||||
continue;
|
||||
for (auto& q1e : q1) {
|
||||
auto p = q1e + DIRSEP + *i;
|
||||
// cout << "P " << p << endl;
|
||||
fl.first(p.c_str());
|
||||
for (; fl.good(); fl.next()) {
|
||||
|
||||
if (fl.name() == string(".") ||
|
||||
fl.name() == string(".."))
|
||||
continue;
|
||||
auto n = q1e + DIRSEP + fl.name();
|
||||
q2.push_back(n);
|
||||
// cout << "Q2 " << n << endl;
|
||||
}
|
||||
}
|
||||
q1.clear();
|
||||
q1.swap(q2);
|
||||
}
|
||||
data.insert(data.end(), q1.begin(), q1.end());
|
||||
return true;
|
||||
}
|
||||
auto begin() -> vector<string>::iterator { return data.begin(); }
|
||||
auto end() -> vector<string>::iterator { return data.end(); }
|
||||
auto append_glob_paths_to(vector<string>& out) -> void
|
||||
{
|
||||
out.insert(out.end(), begin(), end());
|
||||
}
|
||||
};
|
||||
#else
|
||||
// unimplemented
|
||||
struct Globber {
|
||||
Globber(const char* pattern) {}
|
||||
Globber(const string& pattern) {}
|
||||
auto glob(const char* pattern) -> bool { return false; }
|
||||
auto glob(const string& pattern) -> bool { return false; }
|
||||
auto begin() -> char** { return nullptr; }
|
||||
auto end() -> char** { return nullptr; }
|
||||
auto append_glob_paths_to(vector<string>& out) -> void {}
|
||||
};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Append the paths of the LibreOffice's directories to be searched for
|
||||
* dictionaries.
|
||||
*
|
||||
* @warning This function shall not be called from LibreOffice or modules that
|
||||
* may end up being used by LibreOffice. It is mainly intended to be used by
|
||||
* the CLI tool.
|
||||
*
|
||||
* @param paths vector of directory paths to append to
|
||||
*/
|
||||
auto append_libreoffice_dir_paths(std::vector<std::string>& paths) -> void
|
||||
{
|
||||
auto lo_user_glob = string();
|
||||
#ifdef _POSIX_VERSION
|
||||
// add LibreOffice Linux global paths
|
||||
auto prefixes = {"/usr/local/lib/libreoffice", "/usr/lib/libreoffice",
|
||||
"/opt/libreoffice*"};
|
||||
for (auto& prefix : prefixes) {
|
||||
Globber g(string(prefix) + "/share/extensions/dict-*");
|
||||
g.append_glob_paths_to(paths);
|
||||
}
|
||||
|
||||
// add LibreOffice Linux local
|
||||
|
||||
auto home = getenv("HOME");
|
||||
if (home == nullptr)
|
||||
return;
|
||||
lo_user_glob = home;
|
||||
lo_user_glob += "/.config/libreoffice/?/user/uno_packages/cache"
|
||||
"/uno_packages/*/*.oxt/";
|
||||
#elif defined(_WIN32)
|
||||
// add Libreoffice Windows global paths
|
||||
auto prefixes = {getenv("PROGRAMFILES"), getenv("PROGRAMFILES(x86)")};
|
||||
for (auto& prefix : prefixes) {
|
||||
if (prefix == nullptr)
|
||||
continue;
|
||||
Globber g(string(prefix) +
|
||||
"\\LibreOffice ?\\share\\extensions\\dict-*");
|
||||
g.append_glob_paths_to(paths);
|
||||
}
|
||||
|
||||
auto home = getenv("APPDATA");
|
||||
if (home == nullptr)
|
||||
return;
|
||||
lo_user_glob = home;
|
||||
lo_user_glob += "\\libreoffice\\?\\user\\uno_packages\\cache"
|
||||
"\\uno_packages\\*\\*.oxt\\";
|
||||
#else
|
||||
return;
|
||||
#endif
|
||||
// finish adding LibreOffice user path dicts (Linux and Windows)
|
||||
Globber g(lo_user_glob + "dict*");
|
||||
g.append_glob_paths_to(paths);
|
||||
|
||||
g.glob(lo_user_glob + "*.aff");
|
||||
auto path_str = string();
|
||||
for (auto& path : g) {
|
||||
path_str = path;
|
||||
path_str.erase(path_str.rfind(DIRSEP));
|
||||
paths.push_back(path_str);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_POSIX_VERSION) || defined(__MINGW32__)
|
||||
class Directory {
|
||||
DIR* dp = nullptr;
|
||||
struct dirent* ent_p = nullptr;
|
||||
|
||||
public:
|
||||
Directory() = default;
|
||||
Directory(const Directory& d) = delete;
|
||||
void operator=(const Directory& d) = delete;
|
||||
auto open(const string& dirname) -> bool
|
||||
{
|
||||
close();
|
||||
dp = opendir(dirname.c_str());
|
||||
return dp;
|
||||
}
|
||||
auto next() -> bool { return (ent_p = readdir(dp)); }
|
||||
auto entry_name() const -> const char* { return ent_p->d_name; }
|
||||
auto close() -> void
|
||||
{
|
||||
if (dp) {
|
||||
(void)closedir(dp);
|
||||
dp = nullptr;
|
||||
}
|
||||
}
|
||||
~Directory() { close(); }
|
||||
};
|
||||
#elif defined(_WIN32)
|
||||
class Directory {
|
||||
FileListerWindows fl;
|
||||
bool first = true;
|
||||
|
||||
public:
|
||||
Directory() {}
|
||||
Directory(const Directory& d) = delete;
|
||||
void operator=(const Directory& d) = delete;
|
||||
auto open(const string& dirname) -> bool
|
||||
{
|
||||
fl.first(dirname + "\\*");
|
||||
first = true;
|
||||
return fl.good();
|
||||
}
|
||||
auto next() -> bool
|
||||
{
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
fl.next();
|
||||
return fl.good();
|
||||
}
|
||||
auto entry_name() const -> const char* { return fl.name(); }
|
||||
auto close() -> void { fl.close(); }
|
||||
};
|
||||
#else
|
||||
struct Directory {
|
||||
Directory() {}
|
||||
Directory(const Directory& d) = delete;
|
||||
void operator=(const Directory& d) = delete;
|
||||
auto open(const string& dirname) -> bool { return false; }
|
||||
auto next() -> bool { return false; }
|
||||
auto entry_name() const -> const char* { return nullptr; }
|
||||
auto close() -> void {}
|
||||
};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Search a directory for dictionaries.
|
||||
*
|
||||
* This function searches the directory for files that represent a dictionary
|
||||
* and for each one found it appends the pair of dictionary name and filepath to
|
||||
* dictionary, both without the filename extension (.aff or .dic).
|
||||
*
|
||||
* For example for the files /dict/dir/en_US.dic and /dict/dir/en_US.aff the
|
||||
* following pair will be appended ("en_US", "/dict/dir/en_US").
|
||||
*
|
||||
* @todo At some point this API should be made to be more strongly typed.
|
||||
* Instead of using that pair of strings to represent the dictionary files, a
|
||||
* new class should be created with three public functions, getters, that would
|
||||
* return the name, the path to the .aff file (with filename extension to avoid
|
||||
* confusions) and the path to the .dic file. The C++ 17 std::filesystem::path
|
||||
* should probably be used. It is unspecified to the public what this class
|
||||
* holds privately, but it should probably hold only one path to the aff file.
|
||||
* For the directory paths, it is simple, just use the type
|
||||
* std::filesystem::path. When this API is created, the same function names
|
||||
* should be used, added as overloads. The old API should be marked as
|
||||
* deprecated. This should be done when we start requiring GCC 9 which supports
|
||||
* C++ 17 filesystem out of the box. GCC 8 has this too, but it is somewhat
|
||||
* experimental and requires manually linking to additional static library.
|
||||
*
|
||||
* @param dir_path path to directory
|
||||
* @param dict_list vector to append the found dictionaries to
|
||||
*/
|
||||
auto search_dir_for_dicts(const string& dir_path,
|
||||
vector<pair<string, string>>& dict_list) -> void
|
||||
{
|
||||
Directory d;
|
||||
if (d.open(dir_path) == false)
|
||||
return;
|
||||
|
||||
unordered_set<string> dics;
|
||||
string file_name;
|
||||
while (d.next()) {
|
||||
file_name = d.entry_name();
|
||||
auto sz = file_name.size();
|
||||
if (sz < 4)
|
||||
continue;
|
||||
|
||||
if (file_name.compare(sz - 4, 4, ".dic") == 0) {
|
||||
dics.insert(file_name);
|
||||
file_name.replace(sz - 4, 4, ".aff");
|
||||
}
|
||||
else if (file_name.compare(sz - 4, 4, ".aff") == 0) {
|
||||
dics.insert(file_name);
|
||||
file_name.replace(sz - 4, 4, ".dic");
|
||||
}
|
||||
else {
|
||||
continue;
|
||||
}
|
||||
if (dics.count(file_name)) {
|
||||
file_name.erase(sz - 4);
|
||||
auto full_path = dir_path + DIRSEP + file_name;
|
||||
dict_list.emplace_back(move(file_name),
|
||||
move(full_path));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Search the directories for dictionaries.
|
||||
*
|
||||
* @see search_dir_for_dicts()
|
||||
*
|
||||
* @param dir_paths list of paths to directories
|
||||
* @param dict_list vector to append the found dictionaries to
|
||||
*/
|
||||
auto search_dirs_for_dicts(const std::vector<string>& dir_paths,
|
||||
std::vector<std::pair<string, string>>& dict_list)
|
||||
-> void
|
||||
{
|
||||
for (auto& p : dir_paths)
|
||||
search_dir_for_dicts(p, dict_list);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Search the default directories for dictionaries.
|
||||
*
|
||||
* @see append_default_dir_paths()
|
||||
* @see search_dirs_for_dicts()
|
||||
*
|
||||
* @param dict_list vector to append the found dictionaries to
|
||||
*/
|
||||
auto search_default_dirs_for_dicts(
|
||||
std::vector<std::pair<std::string, std::string>>& dict_list) -> void
|
||||
{
|
||||
auto dir_paths = vector<string>();
|
||||
append_default_dir_paths(dir_paths);
|
||||
search_dirs_for_dicts(dir_paths, dict_list);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Find dictionary path given the name.
|
||||
*
|
||||
* Find the first dictionary whose name matches @p dict_name.
|
||||
*
|
||||
* @param dict_list vector of pairs with name and paths
|
||||
* @param dict_name dictionary name
|
||||
* @return iterator of @p dict_list that points to the found dictionary or end
|
||||
* if not found.
|
||||
*/
|
||||
auto find_dictionary(
|
||||
const std::vector<std::pair<std::string, std::string>>& dict_list,
|
||||
const std::string& dict_name)
|
||||
-> std::vector<std::pair<std::string, std::string>>::const_iterator
|
||||
{
|
||||
return find_if(begin(dict_list), end(dict_list),
|
||||
[&](auto& e) { return e.first == dict_name; });
|
||||
}
|
||||
|
||||
Dict_Finder_For_CLI_Tool::Dict_Finder_For_CLI_Tool()
|
||||
{
|
||||
append_default_dir_paths(dir_paths);
|
||||
append_libreoffice_dir_paths(dir_paths);
|
||||
dir_paths.push_back(".");
|
||||
search_dirs_for_dicts(dir_paths, dict_multimap);
|
||||
stable_sort(begin(dict_multimap), end(dict_multimap),
|
||||
[](auto& a, auto& b) { return a.first < b.first; });
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @brief Gets the dictionary path.
|
||||
*
|
||||
* If path is given (contains slash) it returns the input argument,
|
||||
* otherwise searches the found dictionaries by their name and returns their
|
||||
* path.
|
||||
*
|
||||
* @param dict name or path of dictionary without the trailing .aff/.dic.
|
||||
* @return the path to dictionary or empty if does not exists.
|
||||
*/
|
||||
auto Dict_Finder_For_CLI_Tool::get_dictionary_path(
|
||||
const std::string& dict) const -> std::string
|
||||
{
|
||||
#ifdef _WIN32
|
||||
const auto SEPARATORS = "\\/";
|
||||
#else
|
||||
const auto SEPARATORS = '/';
|
||||
#endif
|
||||
// first check if it is a path
|
||||
if (dict.find_first_of(SEPARATORS) != dict.npos) {
|
||||
// a path
|
||||
return dict;
|
||||
}
|
||||
else {
|
||||
// search list
|
||||
auto x = find_dictionary(dict_multimap, dict);
|
||||
if (x != end(dict_multimap))
|
||||
return x->second;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
} // namespace v5
|
||||
} // namespace nuspell
|
||||
@@ -1,84 +0,0 @@
|
||||
/* Copyright 2016-2021 Dimitrij Mijoski
|
||||
*
|
||||
* This file is part of Nuspell.
|
||||
*
|
||||
* Nuspell is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Nuspell is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief Finding dictionaries.
|
||||
*/
|
||||
|
||||
#ifndef NUSPELL_FINDER_HXX
|
||||
#define NUSPELL_FINDER_HXX
|
||||
|
||||
#include "nuspell_export.h"
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define NUSPELL_MSVC_PRAGMA_WARNING(x) __pragma(warning(x))
|
||||
#else
|
||||
#define NUSPELL_MSVC_PRAGMA_WARNING(x)
|
||||
#endif
|
||||
NUSPELL_MSVC_PRAGMA_WARNING(push)
|
||||
NUSPELL_MSVC_PRAGMA_WARNING(disable : 4251)
|
||||
|
||||
namespace nuspell {
|
||||
inline namespace v5 {
|
||||
|
||||
NUSPELL_EXPORT auto append_default_dir_paths(std::vector<std::string>& paths)
|
||||
-> void;
|
||||
|
||||
NUSPELL_EXPORT auto
|
||||
append_libreoffice_dir_paths(std::vector<std::string>& paths) -> void;
|
||||
|
||||
NUSPELL_EXPORT auto search_dir_for_dicts(
|
||||
const std::string& dir_path,
|
||||
std::vector<std::pair<std::string, std::string>>& dict_list) -> void;
|
||||
|
||||
NUSPELL_EXPORT auto search_dirs_for_dicts(
|
||||
const std::vector<std::string>& dir_paths,
|
||||
std::vector<std::pair<std::string, std::string>>& dict_list) -> void;
|
||||
|
||||
NUSPELL_EXPORT auto search_default_dirs_for_dicts(
|
||||
std::vector<std::pair<std::string, std::string>>& dict_list) -> void;
|
||||
|
||||
NUSPELL_EXPORT auto find_dictionary(
|
||||
const std::vector<std::pair<std::string, std::string>>& dict_list,
|
||||
const std::string& dict_name)
|
||||
-> std::vector<std::pair<std::string, std::string>>::const_iterator;
|
||||
|
||||
/**
|
||||
* @brief Don't use this except from Nuspell CLI tool.
|
||||
*
|
||||
* There are no promises of the API.
|
||||
*/
|
||||
class NUSPELL_EXPORT Dict_Finder_For_CLI_Tool {
|
||||
std::vector<std::string> dir_paths;
|
||||
std::vector<std::pair<std::string, std::string>> dict_multimap;
|
||||
|
||||
public:
|
||||
Dict_Finder_For_CLI_Tool();
|
||||
auto& get_dir_paths() const { return dir_paths; }
|
||||
auto& get_dictionaries() const { return dict_multimap; }
|
||||
auto get_dictionary_path(const std::string& dict) const -> std::string;
|
||||
};
|
||||
} // namespace v5
|
||||
} // namespace nuspell
|
||||
NUSPELL_MSVC_PRAGMA_WARNING(pop)
|
||||
#endif // NUSPELL_FINDER_HXX
|
||||
@@ -1,718 +0,0 @@
|
||||
/* Copyright 2016-2021 Dimitrij Mijoski, Sander van Geloven
|
||||
*
|
||||
* This file is part of Nuspell.
|
||||
*
|
||||
* Nuspell is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Nuspell is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "dictionary.hxx"
|
||||
#include "finder.hxx"
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <unicode/brkiter.h>
|
||||
#include <unicode/ucnv.h>
|
||||
|
||||
#if defined(__MINGW32__) || defined(__unix__) || defined(__unix) || \
|
||||
(defined(__APPLE__) && defined(__MACH__)) || defined(__HAIKU__)
|
||||
#include <getopt.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef _POSIX_VERSION
|
||||
#include <langinfo.h>
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#define NOMINMAX
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// manually define if not supplied by the build system
|
||||
#ifndef PROJECT_VERSION
|
||||
#define PROJECT_VERSION "unknown.version"
|
||||
#endif
|
||||
#define PACKAGE_STRING "nuspell " PROJECT_VERSION
|
||||
|
||||
using namespace std;
|
||||
using namespace nuspell;
|
||||
|
||||
enum Mode {
|
||||
DEFAULT_MODE /**< printing correct and misspelled words with
|
||||
suggestions */
|
||||
,
|
||||
MISSPELLED_WORDS_MODE /**< printing only misspelled words */,
|
||||
MISSPELLED_LINES_MODE /**< printing only lines with misspelled word(s)*/
|
||||
,
|
||||
CORRECT_WORDS_MODE /**< printing only correct words */,
|
||||
CORRECT_LINES_MODE /**< printing only fully correct lines */,
|
||||
LINES_MODE, /**< intermediate mode used while parsing command line
|
||||
arguments, otherwise unused */
|
||||
LIST_DICTIONARIES_MODE /**< printing available dictionaries */,
|
||||
HELP_MODE /**< printing help information */,
|
||||
VERSION_MODE /**< printing version information */,
|
||||
ERROR_MODE
|
||||
};
|
||||
|
||||
struct Args_t {
|
||||
Mode mode = DEFAULT_MODE;
|
||||
bool whitespace_segmentation = false;
|
||||
string program_name = "nuspell";
|
||||
string dictionary;
|
||||
string encoding;
|
||||
vector<string> other_dicts;
|
||||
vector<string> files;
|
||||
|
||||
Args_t() = default;
|
||||
Args_t(int argc, char* argv[]) { parse_args(argc, argv); }
|
||||
auto parse_args(int argc, char* argv[]) -> void;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Parses command line arguments.
|
||||
*
|
||||
* @param argc command-line argument count.
|
||||
* @param argv command-line argument vector.
|
||||
*/
|
||||
auto Args_t::parse_args(int argc, char* argv[]) -> void
|
||||
{
|
||||
if (argc != 0 && argv[0] && argv[0][0] != '\0')
|
||||
program_name = argv[0];
|
||||
// See POSIX Utility argument syntax
|
||||
// http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap12.html
|
||||
#if defined(_POSIX_VERSION) || defined(__MINGW32__)
|
||||
int c;
|
||||
// The program can run in various modes depending on the
|
||||
// command line options. mode is FSM state, this while loop is FSM.
|
||||
const char* shortopts = ":d:i:aDGLslhv";
|
||||
const struct option longopts[] = {
|
||||
{"version", 0, nullptr, 'v'},
|
||||
{"help", 0, nullptr, 'h'},
|
||||
{nullptr, 0, nullptr, 0},
|
||||
};
|
||||
while ((c = getopt_long(argc, argv, shortopts, longopts, nullptr)) !=
|
||||
-1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
// ispell pipe mode, same as default mode
|
||||
if (mode != DEFAULT_MODE)
|
||||
mode = ERROR_MODE;
|
||||
break;
|
||||
case 'd':
|
||||
if (dictionary.empty())
|
||||
dictionary = optarg;
|
||||
else
|
||||
cerr << "WARNING: Detected not yet supported "
|
||||
"other dictionary "
|
||||
<< optarg << '\n';
|
||||
other_dicts.emplace_back(optarg);
|
||||
|
||||
break;
|
||||
case 'i':
|
||||
encoding = optarg;
|
||||
|
||||
break;
|
||||
case 'D':
|
||||
if (mode == DEFAULT_MODE)
|
||||
mode = LIST_DICTIONARIES_MODE;
|
||||
else
|
||||
mode = ERROR_MODE;
|
||||
|
||||
break;
|
||||
case 'G':
|
||||
if (mode == DEFAULT_MODE)
|
||||
mode = CORRECT_WORDS_MODE;
|
||||
else if (mode == LINES_MODE)
|
||||
mode = CORRECT_LINES_MODE;
|
||||
else
|
||||
mode = ERROR_MODE;
|
||||
|
||||
break;
|
||||
case 'l':
|
||||
if (mode == DEFAULT_MODE)
|
||||
mode = MISSPELLED_WORDS_MODE;
|
||||
else if (mode == LINES_MODE)
|
||||
mode = MISSPELLED_LINES_MODE;
|
||||
else
|
||||
mode = ERROR_MODE;
|
||||
|
||||
break;
|
||||
case 'L':
|
||||
if (mode == DEFAULT_MODE)
|
||||
mode = LINES_MODE;
|
||||
else if (mode == MISSPELLED_WORDS_MODE)
|
||||
mode = MISSPELLED_LINES_MODE;
|
||||
else if (mode == CORRECT_WORDS_MODE)
|
||||
mode = CORRECT_LINES_MODE;
|
||||
else
|
||||
mode = ERROR_MODE;
|
||||
|
||||
break;
|
||||
case 's':
|
||||
whitespace_segmentation = true;
|
||||
|
||||
break;
|
||||
case 'h':
|
||||
if (mode == DEFAULT_MODE)
|
||||
mode = HELP_MODE;
|
||||
else
|
||||
mode = ERROR_MODE;
|
||||
|
||||
break;
|
||||
case 'v':
|
||||
if (mode == DEFAULT_MODE)
|
||||
mode = VERSION_MODE;
|
||||
else
|
||||
mode = ERROR_MODE;
|
||||
|
||||
break;
|
||||
case ':':
|
||||
cerr << "Option -" << static_cast<char>(optopt)
|
||||
<< " requires an operand\n";
|
||||
mode = ERROR_MODE;
|
||||
|
||||
break;
|
||||
case '?':
|
||||
cerr << "Unrecognized option: '-"
|
||||
<< static_cast<char>(optopt) << "'\n";
|
||||
mode = ERROR_MODE;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
files.insert(files.end(), argv + optind, argv + argc);
|
||||
if (mode == LINES_MODE) {
|
||||
// in v1 this defaults to MISSPELLED_LINES_MODE
|
||||
// we will make it error here
|
||||
mode = ERROR_MODE;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Prints help information to standard output.
|
||||
*
|
||||
* @param program_name pass argv[0] here.
|
||||
*/
|
||||
auto print_help(const string& program_name) -> void
|
||||
{
|
||||
auto& p = program_name;
|
||||
auto& o = cout;
|
||||
o << "Usage:\n"
|
||||
"\n";
|
||||
o << p << " [-s] [-d dict_NAME] [-i enc] [file_name]...\n";
|
||||
o << p << " -l|-G [-L] [-s] [-d dict_NAME] [-i enc] [file_name]...\n";
|
||||
o << p << " -D|-h|--help|-v|--version\n";
|
||||
o << "\n"
|
||||
"Check spelling of each FILE. Without FILE, check standard "
|
||||
"input.\n"
|
||||
"\n"
|
||||
" -d di_CT use di_CT dictionary. Only one dictionary at a\n"
|
||||
" time is currently supported\n"
|
||||
" -D print search paths and available dictionaries\n"
|
||||
" and exit\n"
|
||||
" -i enc input/output encoding, default is active locale\n"
|
||||
" -l print only misspelled words or lines\n"
|
||||
" -G print only correct words or lines\n"
|
||||
" -L lines mode\n"
|
||||
" -s use simple whitespace text segmentation to\n"
|
||||
" extract words instead of the default Unicode\n"
|
||||
" text segmentation. It is not recommended to use\n"
|
||||
" this.\n"
|
||||
" -h, --help print this help and exit\n"
|
||||
" -v, --version print version number and exit\n"
|
||||
"\n";
|
||||
o << "Example: " << p << " -d en_US file.txt\n";
|
||||
o << "\n"
|
||||
"Bug reports: <https://github.com/nuspell/nuspell/issues>\n"
|
||||
"Full documentation: "
|
||||
"<https://github.com/nuspell/nuspell/wiki>\n"
|
||||
"Home page: <http://nuspell.github.io/>\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Prints the version number to standard output.
|
||||
*/
|
||||
auto print_version() -> void
|
||||
{
|
||||
cout << PACKAGE_STRING
|
||||
"\n"
|
||||
"Copyright (C) 2016-2021 Dimitrij Mijoski and Sander van Geloven\n"
|
||||
"License LGPLv3+: GNU LGPL version 3 or later "
|
||||
"<http://gnu.org/licenses/lgpl.html>.\n"
|
||||
"This is free software: you are free to change and "
|
||||
"redistribute it.\n"
|
||||
"There is NO WARRANTY, to the extent permitted by law.\n"
|
||||
"\n"
|
||||
"Written by Dimitrij Mijoski and Sander van Geloven.\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Lists dictionary paths and available dictionaries.
|
||||
*
|
||||
* @param f a finder for search paths and located dictionary.
|
||||
*/
|
||||
auto list_dictionaries(const Dict_Finder_For_CLI_Tool& f) -> void
|
||||
{
|
||||
if (f.get_dir_paths().empty()) {
|
||||
cout << "No search paths available" << '\n';
|
||||
}
|
||||
else {
|
||||
cout << "Search paths:" << '\n';
|
||||
for (auto& p : f.get_dir_paths()) {
|
||||
cout << p << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
// Even if no search paths are available, still report on available
|
||||
// dictionaries.
|
||||
if (f.get_dictionaries().empty()) {
|
||||
cout << "No dictionaries available\n";
|
||||
}
|
||||
else {
|
||||
cout << "Available dictionaries:\n";
|
||||
for (auto& d : f.get_dictionaries()) {
|
||||
cout << left << setw(15) << d.first << ' ' << d.second
|
||||
<< '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto to_utf8(string_view source, string& dest, UConverter* ucnv,
|
||||
UErrorCode& uerr)
|
||||
{
|
||||
dest.resize(dest.capacity());
|
||||
auto len = ucnv_toAlgorithmic(UCNV_UTF8, ucnv, dest.data(), dest.size(),
|
||||
source.data(), source.size(), &uerr);
|
||||
dest.resize(len);
|
||||
if (uerr == U_BUFFER_OVERFLOW_ERROR) {
|
||||
uerr = U_ZERO_ERROR;
|
||||
ucnv_toAlgorithmic(UCNV_UTF8, ucnv, dest.data(), dest.size(),
|
||||
source.data(), source.size(), &uerr);
|
||||
}
|
||||
}
|
||||
|
||||
auto from_utf8(string_view source, string& dest, UConverter* ucnv,
|
||||
UErrorCode& uerr)
|
||||
{
|
||||
dest.resize(dest.capacity());
|
||||
auto len =
|
||||
ucnv_fromAlgorithmic(ucnv, UCNV_UTF8, dest.data(), dest.size(),
|
||||
source.data(), source.size(), &uerr);
|
||||
dest.resize(len);
|
||||
if (uerr == U_BUFFER_OVERFLOW_ERROR) {
|
||||
uerr = U_ZERO_ERROR;
|
||||
ucnv_fromAlgorithmic(ucnv, UCNV_UTF8, dest.data(), dest.size(),
|
||||
source.data(), source.size(), &uerr);
|
||||
}
|
||||
}
|
||||
|
||||
auto to_unicode_string(string_view source, icu::UnicodeString& dest,
|
||||
UConverter* ucnv, UErrorCode& uerr)
|
||||
{
|
||||
auto buf = dest.getBuffer(-1);
|
||||
auto len = ucnv_toUChars(ucnv, buf, dest.getCapacity(), source.data(),
|
||||
source.size(), &uerr);
|
||||
if (uerr == U_BUFFER_OVERFLOW_ERROR) {
|
||||
uerr = U_ZERO_ERROR;
|
||||
dest.releaseBuffer(0);
|
||||
buf = dest.getBuffer(len);
|
||||
if (!buf)
|
||||
throw bad_alloc();
|
||||
len = ucnv_toUChars(ucnv, buf, dest.getCapacity(),
|
||||
source.data(), source.size(), &uerr);
|
||||
}
|
||||
dest.releaseBuffer(len);
|
||||
}
|
||||
|
||||
auto process_word(Mode mode, const Dictionary& dic, string_view word,
|
||||
size_t pos_word, vector<string_view>& wrong_words,
|
||||
vector<string>& suggestions, ostream& out)
|
||||
{
|
||||
auto correct = dic.spell(word);
|
||||
switch (mode) {
|
||||
case DEFAULT_MODE: {
|
||||
if (correct) {
|
||||
out << "*\n";
|
||||
break;
|
||||
}
|
||||
dic.suggest(word, suggestions);
|
||||
if (suggestions.empty()) {
|
||||
out << "# " << word << ' ' << pos_word << '\n';
|
||||
break;
|
||||
}
|
||||
out << "& " << word << ' ' << suggestions.size() << ' '
|
||||
<< pos_word << ": ";
|
||||
out << suggestions[0];
|
||||
for_each(begin(suggestions) + 1, end(suggestions),
|
||||
[&](auto& sug) { out << ", " << sug; });
|
||||
out << '\n';
|
||||
break;
|
||||
}
|
||||
case MISSPELLED_WORDS_MODE:
|
||||
if (!correct)
|
||||
out << word << '\n';
|
||||
break;
|
||||
case CORRECT_WORDS_MODE:
|
||||
if (correct)
|
||||
out << word << '\n';
|
||||
break;
|
||||
case MISSPELLED_LINES_MODE:
|
||||
case CORRECT_LINES_MODE:
|
||||
if (!correct)
|
||||
wrong_words.push_back(word);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto process_word_other_encoding(Mode mode, const Dictionary& dic,
|
||||
string_view word, string_view u8word,
|
||||
size_t pos_word,
|
||||
vector<string_view>& wrong_words,
|
||||
vector<string>& suggestions, ostream& out,
|
||||
UConverter* ucnv, UErrorCode& uerr)
|
||||
{
|
||||
auto correct = dic.spell(u8word);
|
||||
switch (mode) {
|
||||
case DEFAULT_MODE: {
|
||||
if (correct) {
|
||||
out << "*\n";
|
||||
break;
|
||||
}
|
||||
dic.suggest(u8word, suggestions);
|
||||
if (suggestions.empty()) {
|
||||
out << "# " << word << ' ' << pos_word << '\n';
|
||||
break;
|
||||
}
|
||||
out << "& " << word << ' ' << suggestions.size() << ' '
|
||||
<< pos_word << ": ";
|
||||
auto sug_in_encoding = string();
|
||||
from_utf8(suggestions[0], sug_in_encoding, ucnv, uerr);
|
||||
out << sug_in_encoding;
|
||||
for_each(begin(suggestions) + 1, end(suggestions),
|
||||
[&](const string& u8sug) {
|
||||
out << ", ";
|
||||
from_utf8(u8sug, sug_in_encoding, ucnv, uerr);
|
||||
out << sug_in_encoding;
|
||||
});
|
||||
out << '\n';
|
||||
break;
|
||||
}
|
||||
case MISSPELLED_WORDS_MODE:
|
||||
if (!correct)
|
||||
out << word << '\n';
|
||||
break;
|
||||
case CORRECT_WORDS_MODE:
|
||||
if (correct)
|
||||
out << word << '\n';
|
||||
break;
|
||||
case MISSPELLED_LINES_MODE:
|
||||
case CORRECT_LINES_MODE:
|
||||
if (!correct)
|
||||
wrong_words.push_back(word);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto finish_line(Mode mode, const string& line,
|
||||
const vector<string_view>& wrong_words, ostream& out)
|
||||
{
|
||||
switch (mode) {
|
||||
case DEFAULT_MODE:
|
||||
out << '\n';
|
||||
break;
|
||||
case MISSPELLED_LINES_MODE:
|
||||
if (!wrong_words.empty())
|
||||
out << line << '\n';
|
||||
break;
|
||||
case CORRECT_LINES_MODE:
|
||||
if (wrong_words.empty())
|
||||
out << line << '\n';
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto whitespace_segmentation_loop(istream& in, ostream& out,
|
||||
const Dictionary& dic, Mode mode,
|
||||
UConverter* ucnv, UErrorCode& uerr)
|
||||
{
|
||||
auto line = string();
|
||||
auto suggestions = vector<string>();
|
||||
auto wrong_words = vector<string_view>();
|
||||
auto loc = in.getloc();
|
||||
auto& facet = use_facet<ctype<char>>(loc);
|
||||
auto isspace = [&](char c) { return facet.is(facet.space, c); };
|
||||
auto u8word = string();
|
||||
auto is_utf8 = ucnv_getType(ucnv) == UCNV_UTF8;
|
||||
|
||||
while (getline(in, line)) {
|
||||
wrong_words.clear();
|
||||
for (auto a = begin(line); a != end(line);) {
|
||||
a = find_if_not(a, end(line), isspace);
|
||||
if (a == end(line))
|
||||
break;
|
||||
auto b = find_if(a, end(line), isspace);
|
||||
auto word = string_view(&*a, distance(a, b));
|
||||
auto pos_word = distance(begin(line), a);
|
||||
if (is_utf8) {
|
||||
process_word(mode, dic, word, pos_word,
|
||||
wrong_words, suggestions, out);
|
||||
}
|
||||
else {
|
||||
to_utf8(word, u8word, ucnv, uerr);
|
||||
process_word_other_encoding(
|
||||
mode, dic, word, u8word, pos_word,
|
||||
wrong_words, suggestions, out, ucnv, uerr);
|
||||
}
|
||||
a = b;
|
||||
}
|
||||
finish_line(mode, line, wrong_words, out);
|
||||
}
|
||||
}
|
||||
|
||||
auto is_word_break(int32_t typ)
|
||||
{
|
||||
return (UBRK_WORD_NUMBER <= typ && typ < UBRK_WORD_NUMBER_LIMIT) ||
|
||||
(UBRK_WORD_LETTER <= typ && typ < UBRK_WORD_LETTER_LIMIT) ||
|
||||
(UBRK_WORD_KANA <= typ && typ < UBRK_WORD_KANA_LIMIT) ||
|
||||
(UBRK_WORD_IDEO <= typ && typ < UBRK_WORD_IDEO_LIMIT);
|
||||
}
|
||||
|
||||
auto segment_line_utf8(Mode mode, const Dictionary& dic, const string& line,
|
||||
UText* utext, icu::BreakIterator* ubrkiter,
|
||||
UErrorCode& uerr, vector<string>& suggestions,
|
||||
vector<string_view>& wrong_words, ostream& out)
|
||||
{
|
||||
utext_openUTF8(utext, line.data(), line.size(), &uerr);
|
||||
ubrkiter->setText(utext, uerr);
|
||||
for (auto i = ubrkiter->first(), prev = 0; i != ubrkiter->DONE;
|
||||
prev = i, i = ubrkiter->next()) {
|
||||
auto typ = ubrkiter->getRuleStatus();
|
||||
if (is_word_break(typ)) {
|
||||
auto word = string_view(line).substr(prev, i - prev);
|
||||
process_word(mode, dic, word, prev, wrong_words,
|
||||
suggestions, out);
|
||||
}
|
||||
}
|
||||
finish_line(mode, line, wrong_words, out);
|
||||
assert(U_SUCCESS(uerr));
|
||||
}
|
||||
|
||||
auto segment_line_generic(Mode mode, const Dictionary& dic, const string& line,
|
||||
icu::UnicodeString& uline, UConverter* ucnv,
|
||||
icu::BreakIterator* ubrkiter, UErrorCode& uerr,
|
||||
string& u8word, vector<string>& suggestions,
|
||||
vector<string_view>& wrong_words, ostream& out)
|
||||
{
|
||||
to_unicode_string(line, uline, ucnv, uerr);
|
||||
ubrkiter->setText(uline);
|
||||
size_t orig_prev = 0, orig_i = 0;
|
||||
auto src = line.c_str();
|
||||
auto src_end = src + line.size();
|
||||
|
||||
ucnv_resetToUnicode(ucnv);
|
||||
for (auto i = ubrkiter->first(), prev = 0; i != ubrkiter->DONE;
|
||||
prev = i, i = ubrkiter->next(), orig_prev = orig_i) {
|
||||
|
||||
for (auto j = prev; j != i; ++j) {
|
||||
auto cp = ucnv_getNextUChar(ucnv, &src, src_end, &uerr);
|
||||
|
||||
// U_IS_SURROGATE(uline[j]) or
|
||||
// U_IS_LEAD(uline[j]) can work too
|
||||
j += !U_IS_BMP(cp);
|
||||
}
|
||||
orig_i = distance(line.c_str(), src);
|
||||
|
||||
auto typ = ubrkiter->getRuleStatus();
|
||||
if (is_word_break(typ)) {
|
||||
auto uword = uline.tempSubStringBetween(prev, i);
|
||||
u8word.clear();
|
||||
uword.toUTF8String(u8word);
|
||||
auto word = string_view(line).substr(
|
||||
orig_prev, orig_i - orig_prev);
|
||||
process_word_other_encoding(
|
||||
mode, dic, word, u8word, orig_prev, wrong_words,
|
||||
suggestions, out, ucnv, uerr);
|
||||
}
|
||||
}
|
||||
finish_line(mode, line, wrong_words, out);
|
||||
assert(U_SUCCESS(uerr));
|
||||
}
|
||||
|
||||
auto unicode_segentation_loop(istream& in, ostream& out, const Dictionary& dic,
|
||||
Mode mode, UConverter* ucnv, UErrorCode& uerr)
|
||||
{
|
||||
auto line = string();
|
||||
auto suggestions = vector<string>();
|
||||
auto wrong_words = vector<string_view>();
|
||||
|
||||
// TODO: try to use Locale constructed from dictionary name.
|
||||
auto ubrkiter = unique_ptr<icu::BreakIterator>(
|
||||
icu::BreakIterator::createWordInstance(icu::Locale(), uerr));
|
||||
auto utext = icu::LocalUTextPointer(
|
||||
utext_openUTF8(nullptr, line.data(), line.size(), &uerr));
|
||||
auto uline = icu::UnicodeString();
|
||||
auto u8word = string();
|
||||
auto is_utf8 = ucnv_getType(ucnv) == UCNV_UTF8;
|
||||
|
||||
while (getline(in, line)) {
|
||||
wrong_words.clear();
|
||||
if (is_utf8)
|
||||
segment_line_utf8(mode, dic, line, utext.getAlias(),
|
||||
ubrkiter.get(), uerr, suggestions,
|
||||
wrong_words, out);
|
||||
else
|
||||
segment_line_generic(mode, dic, line, uline, ucnv,
|
||||
ubrkiter.get(), uerr, u8word,
|
||||
suggestions, wrong_words, out);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// May speed up I/O. After this, don't use C printf, scanf etc.
|
||||
ios_base::sync_with_stdio(false);
|
||||
|
||||
auto args = Args_t(argc, argv);
|
||||
switch (args.mode) {
|
||||
case HELP_MODE:
|
||||
print_help(args.program_name);
|
||||
return 0;
|
||||
case VERSION_MODE:
|
||||
print_version();
|
||||
return 0;
|
||||
case ERROR_MODE:
|
||||
cerr << "Invalid (combination of) arguments, try '"
|
||||
<< args.program_name << " --help' for more information\n";
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
auto f = Dict_Finder_For_CLI_Tool();
|
||||
if (args.mode == LIST_DICTIONARIES_MODE) {
|
||||
list_dictionaries(f);
|
||||
return 0;
|
||||
}
|
||||
char* loc_str = nullptr;
|
||||
#ifdef _WIN32
|
||||
loc_str = setlocale(LC_CTYPE, nullptr); // will return "C"
|
||||
|
||||
/* On Windows, the console is a buggy thing. If the default C locale is
|
||||
active, then the encoding of the strings gotten from C or C++ stdio
|
||||
(fgets, scanf, cin) is GetConsoleCP(). Stdout accessed via standard
|
||||
functions (printf, cout) expects encoding of GetConsoleOutputCP() which
|
||||
is the same as GetConsoleCP() unless manually changed. By default both
|
||||
are the active OEM encoding, unless changed with the command chcp, or by
|
||||
calling the Set functions.
|
||||
|
||||
If we call setlocale(LC_CTYPE, ""), or let's say setlocale(LC_CTYPE,
|
||||
".1251"), then stdin will still return in the encoding GetConsoleCP(),
|
||||
but stdout functions like printf now will expect a different encoding,
|
||||
the one set via setlocale. Because of this mess don't change locale with
|
||||
setlocale on Windows.
|
||||
|
||||
When stdin or stout are redirected from/to file or another terminal like
|
||||
the one in MSYS2, they are read/written as-is. Then we will assume UTF-8
|
||||
encoding. */
|
||||
#else
|
||||
loc_str = setlocale(LC_CTYPE, "");
|
||||
if (!loc_str) {
|
||||
clog << "WARNING: Invalid locale string, fall back to \"C\".\n";
|
||||
loc_str = setlocale(LC_CTYPE, nullptr); // will return "C"
|
||||
}
|
||||
#endif
|
||||
auto loc_str_sv = string_view(loc_str);
|
||||
if (args.encoding.empty()) {
|
||||
#if _POSIX_VERSION
|
||||
auto enc_str = nl_langinfo(CODESET);
|
||||
args.encoding = enc_str;
|
||||
#elif _WIN32
|
||||
if (_isatty(_fileno(stdin)) || _isatty(_fileno(stdout)))
|
||||
args.encoding = "cp" + to_string(GetConsoleCP());
|
||||
else
|
||||
args.encoding = "UTF-8";
|
||||
#endif
|
||||
}
|
||||
clog << "INFO: Locale LC_CTYPE=" << loc_str_sv
|
||||
<< ", Used encoding=" << args.encoding << '\n';
|
||||
if (args.dictionary.empty()) {
|
||||
// infer dictionary from locale
|
||||
auto idx = min(loc_str_sv.find('.'), loc_str_sv.find('@'));
|
||||
args.dictionary = loc_str_sv.substr(0, idx);
|
||||
}
|
||||
if (args.dictionary.empty()) {
|
||||
cerr << "No dictionary provided and can not infer from OS "
|
||||
"locale\n";
|
||||
}
|
||||
auto filename = f.get_dictionary_path(args.dictionary);
|
||||
if (filename.empty()) {
|
||||
cerr << "Dictionary " << args.dictionary << " not found\n";
|
||||
return 1;
|
||||
}
|
||||
clog << "INFO: Pointed dictionary " << filename << ".{dic,aff}\n";
|
||||
auto dic = Dictionary();
|
||||
try {
|
||||
dic = Dictionary::load_from_path(filename);
|
||||
}
|
||||
catch (const Dictionary_Loading_Error& e) {
|
||||
cerr << e.what() << '\n';
|
||||
return 1;
|
||||
}
|
||||
// ICU reports all types of errors, logic errors and runtime errors
|
||||
// using this enum. We should not check for logic errors, they should
|
||||
// not happend. Optionally, only assert that they are not there can be
|
||||
// used. We should check for runtime errors.
|
||||
// The encoding conversion is a common case where runtime error can
|
||||
// happen, but by default ICU uses Unicode replacement character on
|
||||
// errors and reprots success. This can be changed, but there is no need
|
||||
// for that.
|
||||
auto uerr = U_ZERO_ERROR;
|
||||
auto enc_cstr = args.encoding.c_str();
|
||||
if (args.encoding.empty()) {
|
||||
enc_cstr = nullptr;
|
||||
clog << "WARNING: using default ICU encoding converter for IO"
|
||||
<< endl;
|
||||
}
|
||||
auto ucnv = icu::LocalUConverterPointer(ucnv_open(enc_cstr, &uerr));
|
||||
if (U_FAILURE(uerr)) {
|
||||
cerr << "ERROR: Invalid encoding " << args.encoding << ".\n";
|
||||
return 1;
|
||||
}
|
||||
auto loop_function = unicode_segentation_loop;
|
||||
if (args.whitespace_segmentation)
|
||||
loop_function = whitespace_segmentation_loop;
|
||||
if (args.files.empty()) {
|
||||
loop_function(cin, cout, dic, args.mode, ucnv.getAlias(), uerr);
|
||||
}
|
||||
else {
|
||||
for (auto& file_name : args.files) {
|
||||
ifstream in(file_name);
|
||||
if (!in.is_open()) {
|
||||
cerr << "Can't open " << file_name << '\n';
|
||||
return 1;
|
||||
}
|
||||
loop_function(in, cout, dic, args.mode, ucnv.getAlias(),
|
||||
uerr);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
1
app/src/main/icu4c
Submodule
1
app/src/main/icu4c
Submodule
Submodule app/src/main/icu4c added at c434a473c5
@@ -23,9 +23,13 @@ import android.content.Intent
|
||||
import android.content.IntentFilter
|
||||
import android.os.Build
|
||||
import androidx.core.os.UserManagerCompat
|
||||
import dev.patrickgold.florisboard.common.NativeStr
|
||||
import dev.patrickgold.florisboard.common.toNativeStr
|
||||
import dev.patrickgold.florisboard.crashutility.CrashUtility
|
||||
import dev.patrickgold.florisboard.debug.Flog
|
||||
import dev.patrickgold.florisboard.debug.LogTopic
|
||||
import dev.patrickgold.florisboard.debug.flogError
|
||||
import dev.patrickgold.florisboard.debug.flogInfo
|
||||
import dev.patrickgold.florisboard.ime.core.Preferences
|
||||
import dev.patrickgold.florisboard.ime.core.SubtypeManager
|
||||
import dev.patrickgold.florisboard.ime.dictionary.DictionaryManager
|
||||
@@ -34,12 +38,16 @@ import dev.patrickgold.florisboard.ime.theme.ThemeManager
|
||||
import dev.patrickgold.florisboard.res.AssetManager
|
||||
import dev.patrickgold.florisboard.res.FlorisRef
|
||||
import timber.log.Timber
|
||||
import java.lang.Exception
|
||||
import java.lang.ref.WeakReference
|
||||
import java.io.File
|
||||
import kotlin.Exception
|
||||
|
||||
@Suppress("unused")
|
||||
class FlorisApplication : Application() {
|
||||
companion object {
|
||||
private const val ICU_DATA_ASSET_PATH = "icu/icudt69l.dat"
|
||||
|
||||
private external fun nativeInitICUData(path: NativeStr): Int
|
||||
|
||||
init {
|
||||
try {
|
||||
System.loadLibrary("florisboard-native")
|
||||
@@ -61,6 +69,7 @@ class FlorisApplication : Application() {
|
||||
flogLevels = Flog.LEVEL_ALL,
|
||||
flogOutputs = Flog.OUTPUT_CONSOLE
|
||||
)
|
||||
initICU()
|
||||
CrashUtility.install(this)
|
||||
val prefs = Preferences.initDefault(this)
|
||||
val assetManager = AssetManager.init(this)
|
||||
@@ -80,6 +89,32 @@ class FlorisApplication : Application() {
|
||||
}
|
||||
}
|
||||
|
||||
fun initICU(): Boolean {
|
||||
try {
|
||||
val context = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) {
|
||||
createDeviceProtectedStorageContext()
|
||||
} else {
|
||||
this
|
||||
}
|
||||
val androidAssetManager = context.assets ?: return false
|
||||
val dstDataFile = File(context.cacheDir, "icudt.dat")
|
||||
dstDataFile.outputStream().use { os ->
|
||||
androidAssetManager.open(ICU_DATA_ASSET_PATH).use { it.copyTo(os) }
|
||||
}
|
||||
val status = nativeInitICUData(dstDataFile.absolutePath.toNativeStr())
|
||||
return if (status != 0) {
|
||||
flogError { "Native ICU data initializing failed with error code $status!" }
|
||||
false
|
||||
} else {
|
||||
flogInfo { "Successfully loaded ICU data!" }
|
||||
true
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
flogError { e.toString() }
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
fun init() {
|
||||
CrashUtility.install(this)
|
||||
val prefs = Preferences.initDefault(this)
|
||||
|
||||
@@ -17,23 +17,26 @@
|
||||
package dev.patrickgold.florisboard
|
||||
|
||||
import android.service.textservice.SpellCheckerService
|
||||
import android.util.LruCache
|
||||
import android.view.textservice.SentenceSuggestionsInfo
|
||||
import android.view.textservice.SuggestionsInfo
|
||||
import android.view.textservice.TextInfo
|
||||
import dev.patrickgold.florisboard.common.FlorisLocale
|
||||
import dev.patrickgold.florisboard.debug.LogTopic
|
||||
import dev.patrickgold.florisboard.debug.flogInfo
|
||||
import dev.patrickgold.florisboard.ime.core.Preferences
|
||||
import dev.patrickgold.florisboard.ime.core.Subtype
|
||||
import dev.patrickgold.florisboard.ime.core.SubtypeManager
|
||||
import dev.patrickgold.florisboard.ime.dictionary.DictionaryManager
|
||||
import dev.patrickgold.florisboard.ime.spelling.SpellingDict
|
||||
import dev.patrickgold.florisboard.ime.spelling.SpellingManager
|
||||
import java.util.*
|
||||
import kotlin.contracts.InvocationKind
|
||||
import kotlin.contracts.contract
|
||||
import dev.patrickgold.florisboard.ime.spelling.SpellingService
|
||||
import kotlinx.coroutines.runBlocking
|
||||
|
||||
class FlorisSpellCheckerService : SpellCheckerService() {
|
||||
companion object {
|
||||
private const val USE_FLORIS_SUBTYPES_LOCALE: String = "zz"
|
||||
}
|
||||
|
||||
private val dictionaryManager get() = DictionaryManager.default()
|
||||
private val spellingService: SpellingService = SpellingService.globalInstance()
|
||||
private val subtypeManager get() = SubtypeManager.default()
|
||||
|
||||
override fun onCreate() {
|
||||
flogInfo(LogTopic.SPELL_EVENTS)
|
||||
@@ -54,75 +57,87 @@ class FlorisSpellCheckerService : SpellCheckerService() {
|
||||
super.onDestroy()
|
||||
}
|
||||
|
||||
private class SuggestionsCache(size: Int) {
|
||||
val suggestionsInfoCache: LruCache<String, SuggestionsInfo> = LruCache(size)
|
||||
|
||||
inline fun getOrGenerate(word: String, generator: (w: String) -> SuggestionsInfo): SuggestionsInfo {
|
||||
contract {
|
||||
callsInPlace(generator, InvocationKind.AT_MOST_ONCE)
|
||||
}
|
||||
val cachedSuggestionsInfo = suggestionsInfoCache.get(word)
|
||||
if (cachedSuggestionsInfo != null) {
|
||||
return cachedSuggestionsInfo
|
||||
}
|
||||
val newSuggestionsInfo = generator(word)
|
||||
suggestionsInfoCache.put(word, newSuggestionsInfo)
|
||||
return newSuggestionsInfo
|
||||
}
|
||||
}
|
||||
|
||||
private class FlorisSpellCheckerSession : Session() {
|
||||
companion object {
|
||||
private const val USE_FLORIS_SUBTYPES_LOCALE: String = "zz"
|
||||
private const val SUGGESTIONS_MAX_SIZE = 50
|
||||
|
||||
private val EMPTY_STRING_ARRAY: Array<out String> = arrayOf()
|
||||
}
|
||||
|
||||
private val prefs get() = Preferences.default()
|
||||
private val dictionaryManager get() = DictionaryManager.default()
|
||||
private val spellingManager get() = SpellingManager.default()
|
||||
private val subtypeManager get() = SubtypeManager.default()
|
||||
|
||||
private var spellingDict: SpellingDict? = null
|
||||
private lateinit var spellingLocale: Locale
|
||||
private val suggestionsCache = SuggestionsCache(SUGGESTIONS_MAX_SIZE)
|
||||
private inner class FlorisSpellCheckerSession : Session() {
|
||||
private var cachedSpellingLocale: FlorisLocale? = null
|
||||
|
||||
override fun onCreate() {
|
||||
flogInfo(LogTopic.SPELL_EVENTS) { "Session locale: $locale" }
|
||||
|
||||
spellingLocale = when (locale) {
|
||||
setupSpellingIfNecessary()
|
||||
}
|
||||
|
||||
private fun setupSpellingIfNecessary() {
|
||||
val evaluatedLocale = when (locale) {
|
||||
null -> Subtype.DEFAULT.locale
|
||||
USE_FLORIS_SUBTYPES_LOCALE -> (subtypeManager.getActiveSubtype() ?: Subtype.DEFAULT).locale
|
||||
else -> Locale(locale)
|
||||
else -> FlorisLocale.from(locale)
|
||||
}
|
||||
|
||||
spellingDict = spellingManager.getSpellingDict(spellingLocale)
|
||||
if (evaluatedLocale != cachedSpellingLocale) {
|
||||
cachedSpellingLocale = evaluatedLocale
|
||||
}
|
||||
}
|
||||
|
||||
private fun spellMultiple(
|
||||
spellingLocale: FlorisLocale,
|
||||
textInfos: Array<out TextInfo>,
|
||||
suggestionsLimit: Int
|
||||
): Array<SuggestionsInfo> = runBlocking {
|
||||
val retInfos = Array(textInfos.size) { n ->
|
||||
val word = textInfos[n].text ?: ""
|
||||
spellingService.spellAsync(spellingLocale, word, suggestionsLimit)
|
||||
}
|
||||
Array(textInfos.size) { n ->
|
||||
retInfos[n].await().apply {
|
||||
setCookieAndSequence(textInfos[n].cookie, textInfos[n].sequence)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun onGetSuggestions(textInfo: TextInfo?, suggestionsLimit: Int): SuggestionsInfo {
|
||||
flogInfo(LogTopic.SPELL_EVENTS) { "text=${textInfo?.text}, limit=$suggestionsLimit"}
|
||||
flogInfo(LogTopic.SPELL_EVENTS) { "text=${textInfo?.text}, limit=$suggestionsLimit" }
|
||||
|
||||
val spellingDict = spellingDict ?: return SuggestionsInfo(0, EMPTY_STRING_ARRAY)
|
||||
val word = textInfo?.text ?: return SuggestionsInfo(0, EMPTY_STRING_ARRAY)
|
||||
textInfo?.text ?: return SpellingService.emptySuggestionsInfo()
|
||||
setupSpellingIfNecessary()
|
||||
val spellingLocale = cachedSpellingLocale ?: return SpellingService.emptySuggestionsInfo()
|
||||
|
||||
return suggestionsCache.getOrGenerate(word) {
|
||||
var isWordOk = false
|
||||
if (prefs.spelling.useUdmEntries) {
|
||||
isWordOk = dictionaryManager.spell(word, spellingLocale)
|
||||
}
|
||||
return@getOrGenerate if (isWordOk) {
|
||||
SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, EMPTY_STRING_ARRAY)
|
||||
} else {
|
||||
isWordOk = spellingDict.spell(word)
|
||||
if (isWordOk) {
|
||||
SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, EMPTY_STRING_ARRAY)
|
||||
} else {
|
||||
val suggestions = spellingDict.suggest(word, suggestionsLimit)
|
||||
SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO, suggestions)
|
||||
}
|
||||
}
|
||||
}
|
||||
return spellingService.spell(spellingLocale, textInfo.text, suggestionsLimit)
|
||||
}
|
||||
|
||||
override fun onGetSuggestionsMultiple(
|
||||
textInfos: Array<out TextInfo>?,
|
||||
suggestionsLimit: Int,
|
||||
sequentialWords: Boolean
|
||||
): Array<SuggestionsInfo> {
|
||||
flogInfo(LogTopic.SPELL_EVENTS)
|
||||
|
||||
textInfos ?: return emptyArray()
|
||||
setupSpellingIfNecessary()
|
||||
val spellingLocale = cachedSpellingLocale ?: return emptyArray()
|
||||
|
||||
return spellMultiple(spellingLocale, textInfos, suggestionsLimit)
|
||||
}
|
||||
|
||||
override fun onGetSentenceSuggestionsMultiple(
|
||||
textInfos: Array<out TextInfo>?,
|
||||
suggestionsLimit: Int
|
||||
): Array<SentenceSuggestionsInfo> {
|
||||
flogInfo(LogTopic.SPELL_EVENTS)
|
||||
|
||||
// TODO: implement custom solution here instead of calling the default implementation
|
||||
return super.onGetSentenceSuggestionsMultiple(textInfos, suggestionsLimit)
|
||||
}
|
||||
|
||||
override fun onCancel() {
|
||||
flogInfo(LogTopic.SPELL_EVENTS)
|
||||
|
||||
super.onCancel()
|
||||
}
|
||||
|
||||
override fun onClose() {
|
||||
flogInfo(LogTopic.SPELL_EVENTS)
|
||||
|
||||
super.onClose()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.common
|
||||
|
||||
import kotlinx.serialization.KSerializer
|
||||
import kotlinx.serialization.Serializable
|
||||
import kotlinx.serialization.descriptors.PrimitiveKind
|
||||
import kotlinx.serialization.descriptors.PrimitiveSerialDescriptor
|
||||
import kotlinx.serialization.descriptors.SerialDescriptor
|
||||
import kotlinx.serialization.encoding.Decoder
|
||||
import kotlinx.serialization.encoding.Encoder
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* Project-specific locale class wrapping [java.util.Locale]. The wrapping is
|
||||
* necessary to provide consistent language display names and tags across the
|
||||
* whole code base.
|
||||
*
|
||||
* This class would be ideal for Kotlin's value classes, though AndroidX.Room
|
||||
* does not like this at all, so this is a "normal" class.
|
||||
*
|
||||
* To construct a FlorisLocale, use one of the many from() methods provided.
|
||||
*
|
||||
* @see java.util.Locale
|
||||
*/
|
||||
@Serializable(with = FlorisLocale.Serializer::class)
|
||||
class FlorisLocale private constructor(val base: Locale) {
|
||||
companion object {
|
||||
/** Delimiter for a language tag. */
|
||||
private const val DELIMITER_LANGUAGE_TAG = '-'
|
||||
/** Delimiter for a locale tag. */
|
||||
private const val DELIMITER_LOCALE_TAG = '_'
|
||||
|
||||
/** Delimiter regex to split language/locale tags. */
|
||||
private val DELIMITER_SPLITTER = """[${DELIMITER_LANGUAGE_TAG}${DELIMITER_LOCALE_TAG}]""".toRegex()
|
||||
|
||||
/** Constant locale for ROOT */
|
||||
val ROOT = from("", "", "")
|
||||
|
||||
/** Constant locale for ENGLISH */
|
||||
val ENGLISH = from("en", "", "")
|
||||
|
||||
/**
|
||||
* Wraps a [java.util.Locale] and returns the [FlorisLocale].
|
||||
*
|
||||
* @return The wrapped locale.
|
||||
*/
|
||||
fun from(javaLocale: Locale) = FlorisLocale(javaLocale)
|
||||
|
||||
/**
|
||||
* Constructs a new [FlorisLocale] with given [language].
|
||||
*
|
||||
* @param language A two-letter language code.
|
||||
*
|
||||
* @return A new [FlorisLocale].
|
||||
*/
|
||||
fun from(language: String) = from(Locale(language))
|
||||
|
||||
/**
|
||||
* Constructs a new [FlorisLocale] with given [language] and [country].
|
||||
*
|
||||
* @param language A two-letter language code.
|
||||
* @param country A two-letter country code.
|
||||
*
|
||||
* @return A new [FlorisLocale].
|
||||
*/
|
||||
fun from(language: String, country: String) = from(Locale(language, country))
|
||||
|
||||
/**
|
||||
* Constructs a new [FlorisLocale] with given [language], [country] and [variant].
|
||||
*
|
||||
* @param language A two-letter language code.
|
||||
* @param country A two-letter country code.
|
||||
* @param variant A two-letter variant code.
|
||||
*
|
||||
* @return A new [FlorisLocale].
|
||||
*/
|
||||
fun from(language: String, country: String, variant: String) = from(Locale(language, country, variant))
|
||||
|
||||
/**
|
||||
* Constructs a new [FlorisLocale] from given [str].
|
||||
*
|
||||
* @param str Either a language or locale tag in string form.
|
||||
*
|
||||
* @return A new [FlorisLocale].
|
||||
*/
|
||||
fun fromTag(str: String) = when {
|
||||
str.contains(DELIMITER_SPLITTER) -> {
|
||||
val lc = str.split(DELIMITER_SPLITTER)
|
||||
if (lc.size >= 3) {
|
||||
from(lc[0], lc[1], lc[2])
|
||||
} else {
|
||||
from(lc[0], lc[1])
|
||||
}
|
||||
}
|
||||
else -> from(str)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current value of the default locale for this instance of
|
||||
* the Java Virtual Machine.
|
||||
*
|
||||
* @see java.util.Locale.getDefault
|
||||
*/
|
||||
fun default() = FlorisLocale(Locale.getDefault())
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a locale or language tag for this locale by using [delimiter].
|
||||
*
|
||||
* @param delimiter The delimiter to use between the components.
|
||||
*
|
||||
* @return The generated tag for this locale. May be an empty string if
|
||||
* [language], [country] and [variant] are not specified.
|
||||
*/
|
||||
private fun buildLocaleString(delimiter: Char) = stringBuilder {
|
||||
val language = base.language
|
||||
val country = base.country
|
||||
val variant = base.variant
|
||||
append(language)
|
||||
if (language.isNotBlank() && country.isNotBlank()) {
|
||||
append(delimiter)
|
||||
}
|
||||
append(country)
|
||||
if (country.isNotBlank() && variant.isNotBlank()) {
|
||||
append(delimiter)
|
||||
}
|
||||
append(variant)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the language code of this locale.
|
||||
*
|
||||
* @see java.util.Locale.getLanguage
|
||||
*/
|
||||
val language: String get() = base.language
|
||||
|
||||
/**
|
||||
* Returns the country/region code for this locale.
|
||||
*
|
||||
* @see java.util.Locale.getCountry
|
||||
*/
|
||||
val country: String get() = base.country
|
||||
|
||||
/**
|
||||
* Returns the variant code for this locale.
|
||||
*
|
||||
* @see java.util.Locale.getVariant
|
||||
*/
|
||||
val variant: String get() = base.variant
|
||||
|
||||
/**
|
||||
* Returns a three-letter abbreviation of this locale's language.
|
||||
*
|
||||
* @see java.util.Locale.getISO3Language
|
||||
*/
|
||||
val iso3Language: String get() = base.isO3Language
|
||||
|
||||
/**
|
||||
* Returns a three-letter abbreviation of this locale's country.
|
||||
*
|
||||
* @see java.util.Locale.getISO3Country
|
||||
*/
|
||||
val iso3Country: String get() = base.isO3Country
|
||||
|
||||
/**
|
||||
* Generates the language tag for this locale in the format `xx`,
|
||||
* `xx-YY` or `xx-YY-zzz` and returns it as a string.
|
||||
*
|
||||
* xx: Two-letter language code
|
||||
* YY: Two-letter country code
|
||||
* zzz: Three letter variant
|
||||
*
|
||||
* @return The language tag for this locale. May be an empty string if
|
||||
* [language], [country] and [variant] are not specified.
|
||||
*/
|
||||
fun languageTag(): String = buildLocaleString(DELIMITER_LANGUAGE_TAG)
|
||||
|
||||
/**
|
||||
* Generates the locale tag for this locale in the format `xx`,
|
||||
* `xx_YY` or `xx_YY_zzz` and returns it as a string.
|
||||
*
|
||||
* xx: Two-letter language code
|
||||
* YY: Two-letter country code
|
||||
* zzz: Three letter variant
|
||||
*
|
||||
* @return The locale tag for this locale. May be an empty string if
|
||||
* [language], [country] and [variant] are not specified.
|
||||
*/
|
||||
fun localeTag(): String = buildLocaleString(DELIMITER_LOCALE_TAG)
|
||||
|
||||
/**
|
||||
* Returns the name of this locale's language, localized to [locale].
|
||||
*
|
||||
* @see java.util.Locale.getDisplayLanguage
|
||||
*/
|
||||
fun displayLanguage(locale: FlorisLocale = default()): String = base.getDisplayLanguage(locale.base)
|
||||
|
||||
/**
|
||||
* Returns the name of this locale's country, localized to [locale].
|
||||
*
|
||||
* @see java.util.Locale.getDisplayCountry
|
||||
*/
|
||||
fun displayCountry(locale: FlorisLocale = default()): String = base.getDisplayCountry(locale.base)
|
||||
|
||||
/**
|
||||
* Returns a name for the locale's variant code that is appropriate for
|
||||
* display to the user.
|
||||
*
|
||||
* @see java.util.Locale.getDisplayVariant
|
||||
*/
|
||||
fun displayVariant(locale: FlorisLocale = default()): String = base.getDisplayVariant(locale.base)
|
||||
|
||||
/**
|
||||
* Returns the display name for this locale, localized to [locale] in
|
||||
* the format `Language`, `Language (Country)` or `Language (Country) \[VARIANT]`.
|
||||
*
|
||||
* @param locale The locale to use for generating the display name for
|
||||
* this locale, or [default] if otherwise.
|
||||
*
|
||||
* @return The display name for this locale. May be an empty string if
|
||||
* [language], [country] and [variant] are not specified.
|
||||
*/
|
||||
fun displayName(locale: FlorisLocale = default()) = stringBuilder {
|
||||
val languageName = displayLanguage(locale).ifBlank { base.language }
|
||||
val countryName = displayCountry(locale).ifBlank { base.country }
|
||||
val variantName = displayVariant(locale).ifBlank { base.variant }
|
||||
append(languageName)
|
||||
if (countryName.isNotBlank()) {
|
||||
if (languageName.isNotBlank()) {
|
||||
append(' ')
|
||||
}
|
||||
append('(')
|
||||
append(countryName)
|
||||
append(')')
|
||||
}
|
||||
if (variantName.isNotBlank()) {
|
||||
if (languageName.isNotBlank() || countryName.isNotBlank()) {
|
||||
append(' ')
|
||||
}
|
||||
append('[')
|
||||
append(variantName.uppercase())
|
||||
append(']')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a debug string representing this locale. Not to be confused
|
||||
* with [java.util.Locale.toString], which produces a locale tag. If such
|
||||
* tag is needed, use [localeTag].
|
||||
*
|
||||
* @return The debug representation of this locale.
|
||||
*/
|
||||
override fun toString() = "FlorisLocale { l=${base.language} c=${base.country} v=${base.variant} }"
|
||||
|
||||
/**
|
||||
* Equality check for this locale.
|
||||
*/
|
||||
override fun equals(other: Any?): Boolean {
|
||||
if (this === other) return true
|
||||
if (javaClass != other?.javaClass) return false
|
||||
|
||||
other as FlorisLocale
|
||||
|
||||
if (base != other.base) return false
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the hash code for this locale.
|
||||
*/
|
||||
override fun hashCode(): Int {
|
||||
return base.hashCode()
|
||||
}
|
||||
|
||||
/**
|
||||
* The JSON (de)serializer for FlorisLocale.
|
||||
*/
|
||||
class Serializer : KSerializer<FlorisLocale> {
|
||||
override val descriptor: SerialDescriptor =
|
||||
PrimitiveSerialDescriptor("FlorisLocale", PrimitiveKind.STRING)
|
||||
|
||||
override fun serialize(encoder: Encoder, value: FlorisLocale) {
|
||||
encoder.encodeString(value.languageTag())
|
||||
}
|
||||
|
||||
override fun deserialize(decoder: Decoder): FlorisLocale {
|
||||
return fromTag(decoder.decodeString())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.common
|
||||
|
||||
import kotlin.contracts.InvocationKind
|
||||
import kotlin.contracts.contract
|
||||
|
||||
inline fun <T> resultOk(value: () -> T): Result<T> {
|
||||
contract {
|
||||
callsInPlace(value, InvocationKind.EXACTLY_ONCE)
|
||||
}
|
||||
return Result.success(value())
|
||||
}
|
||||
|
||||
inline fun <T> resultErr(error: () -> Throwable): Result<T> {
|
||||
contract {
|
||||
callsInPlace(error, InvocationKind.EXACTLY_ONCE)
|
||||
}
|
||||
return Result.failure(error())
|
||||
}
|
||||
|
||||
inline fun <T> resultErrStr(error: () -> String): Result<T> {
|
||||
contract {
|
||||
callsInPlace(error, InvocationKind.EXACTLY_ONCE)
|
||||
}
|
||||
return Result.failure(Exception(error()))
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
@file:Suppress("NOTHING_TO_INLINE")
|
||||
|
||||
package dev.patrickgold.florisboard.common
|
||||
|
||||
import kotlin.contracts.InvocationKind
|
||||
import kotlin.contracts.contract
|
||||
|
||||
inline fun <R> stringBuilder(builder: StringBuilder.() -> R): String {
|
||||
contract {
|
||||
callsInPlace(builder, InvocationKind.EXACTLY_ONCE)
|
||||
}
|
||||
val sb = StringBuilder()
|
||||
builder(sb)
|
||||
return sb.toString()
|
||||
}
|
||||
|
||||
inline fun String.lowercase(locale: FlorisLocale): String = this.lowercase(locale.base)
|
||||
|
||||
inline fun String.uppercase(locale: FlorisLocale): String = this.uppercase(locale.base)
|
||||
@@ -37,10 +37,13 @@ object LogTopic {
|
||||
const val TEXT_KEYBOARD_VIEW: FlogTopic = 16u
|
||||
const val GESTURES: FlogTopic = 32u
|
||||
const val SMARTBAR: FlogTopic = 64u
|
||||
const val THEME_MANAGER: FlogTopic = 128u
|
||||
const val ASSET_MANAGER: FlogTopic = 256u
|
||||
|
||||
const val GLIDE: FlogTopic = 512u
|
||||
const val CLIPBOARD: FlogTopic = 1024u
|
||||
const val CRASH_UTILITY: FlogTopic = 2048u
|
||||
|
||||
const val SPELL_EVENTS: FlogTopic = 4096u
|
||||
const val EDITOR_INSTANCE: FlogTopic = 0x00_00_20_00u
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ import dev.patrickgold.florisboard.ime.clip.provider.ClipboardItem
|
||||
import dev.patrickgold.florisboard.ime.core.FlorisBoard
|
||||
import dev.patrickgold.florisboard.ime.core.InputKeyEvent
|
||||
import dev.patrickgold.florisboard.ime.text.key.KeyCode
|
||||
import dev.patrickgold.florisboard.ime.text.keyboard.BasicTextKeyData
|
||||
import dev.patrickgold.florisboard.ime.text.keyboard.TextKeyData
|
||||
import kotlinx.coroutines.*
|
||||
import kotlin.math.pow
|
||||
|
||||
@@ -128,15 +128,14 @@ class ClipboardInputManager private constructor() : CoroutineScope by MainScope(
|
||||
event ?: return false
|
||||
|
||||
val data = when (view.id) {
|
||||
R.id.back_to_keyboard_button -> BasicTextKeyData(code = KeyCode.SWITCH_TO_TEXT_CONTEXT)
|
||||
R.id.clear_clipboard_history -> BasicTextKeyData(code = KeyCode.CLEAR_CLIPBOARD_HISTORY)
|
||||
R.id.back_to_keyboard_button -> TextKeyData(code = KeyCode.SWITCH_TO_TEXT_CONTEXT)
|
||||
R.id.clear_clipboard_history -> TextKeyData(code = KeyCode.CLEAR_CLIPBOARD_HISTORY)
|
||||
else -> null
|
||||
} ?: return false
|
||||
|
||||
when (event.actionMasked) {
|
||||
MotionEvent.ACTION_DOWN -> {
|
||||
florisboard.keyPressVibrate()
|
||||
florisboard.keyPressSound(data)
|
||||
florisboard.inputFeedbackManager.keyPress(data)
|
||||
florisboard.textInputManager.inputEventDispatcher.send(InputKeyEvent.down(data))
|
||||
}
|
||||
MotionEvent.ACTION_UP -> {
|
||||
|
||||
@@ -3,6 +3,7 @@ package dev.patrickgold.florisboard.ime.clip
|
||||
import android.content.ClipboardManager
|
||||
import android.content.Context
|
||||
import android.content.Context.CLIPBOARD_SERVICE
|
||||
import dev.patrickgold.florisboard.debug.flogDebug
|
||||
import dev.patrickgold.florisboard.ime.clip.provider.*
|
||||
import dev.patrickgold.florisboard.ime.core.FlorisBoard
|
||||
import dev.patrickgold.florisboard.ime.core.Preferences
|
||||
@@ -13,7 +14,6 @@ import kotlinx.coroutines.MainScope
|
||||
import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.launch
|
||||
import java.io.Closeable
|
||||
import java.util.*
|
||||
import kotlin.collections.ArrayDeque
|
||||
|
||||
/**
|
||||
@@ -237,7 +237,7 @@ class FlorisClipboardManager private constructor() : ClipboardManager.OnPrimaryC
|
||||
} else if (prefs.clipboard.enableHistory) {
|
||||
// in the event history is enabled, and it should be updated it is updated
|
||||
if (shouldUpdateHistory) {
|
||||
updateHistory(ClipboardItem.fromClipData(systemPrimaryClip, false))
|
||||
updateHistory(ClipboardItem.fromClipData(systemPrimaryClip, true))
|
||||
} else {
|
||||
shouldUpdateHistory = true
|
||||
}
|
||||
@@ -293,9 +293,7 @@ class FlorisClipboardManager private constructor() : ClipboardManager.OnPrimaryC
|
||||
ClipboardInputManager.getInstance().notifyItemRangeRemoved(pins.size + history.size, numToPop)
|
||||
}
|
||||
FlorisBoard.getInstance().clipInputManager.initClipboard(this.history, this.pins)
|
||||
prefs
|
||||
cleanUpJob = launch(Dispatchers.Main) {
|
||||
|
||||
while (true) {
|
||||
cleanUpClipboard.run()
|
||||
delay(INTERVAL)
|
||||
@@ -304,7 +302,11 @@ class FlorisClipboardManager private constructor() : ClipboardManager.OnPrimaryC
|
||||
launch(Dispatchers.IO) {
|
||||
pinsDao = PinnedItemsDatabase.getInstance().clipboardItemDao()
|
||||
pinsDao.getAll().toCollection(pins)
|
||||
FlorisContentProvider.getInstance().initIfNotAlready()
|
||||
try {
|
||||
FlorisContentProvider.getInstance().initIfNotAlready()
|
||||
} catch (e: Exception) {
|
||||
e.fillInStackTrace()
|
||||
}
|
||||
}
|
||||
} catch (e : Exception) {
|
||||
e.fillInStackTrace()
|
||||
|
||||
@@ -22,7 +22,6 @@ enum class ItemType(val value: Int) {
|
||||
|
||||
/**
|
||||
* Represents an item on the clipboard.
|
||||
* The URI stored belongs to FlorisContentProvider, not whatever app copied the image
|
||||
*
|
||||
* If type == ItemType.IMAGE there must be a uri set
|
||||
* if type == ItemType.TEXT there must be a text set
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -23,26 +23,22 @@ import android.content.res.Configuration
|
||||
import android.graphics.Color
|
||||
import android.inputmethodservice.ExtractEditText
|
||||
import android.inputmethodservice.InputMethodService
|
||||
import android.media.AudioManager
|
||||
import android.os.Build
|
||||
import android.os.Bundle
|
||||
import android.os.Handler
|
||||
import android.os.Looper
|
||||
import android.os.VibrationEffect
|
||||
import android.os.Vibrator
|
||||
import android.util.Size
|
||||
import android.view.ContextThemeWrapper
|
||||
import android.view.Gravity
|
||||
import android.view.HapticFeedbackConstants
|
||||
import android.view.LayoutInflater
|
||||
import android.view.View
|
||||
import android.view.ViewGroup
|
||||
import android.view.ViewTreeObserver
|
||||
import android.view.WindowManager
|
||||
import android.view.inputmethod.EditorInfo
|
||||
import android.view.inputmethod.ExtractedText
|
||||
import android.view.inputmethod.InlineSuggestionsRequest
|
||||
import android.view.inputmethod.InlineSuggestionsResponse
|
||||
import android.view.inputmethod.InputConnection
|
||||
import android.view.inputmethod.InputMethodManager
|
||||
import android.widget.Button
|
||||
import android.widget.FrameLayout
|
||||
@@ -57,7 +53,6 @@ import dev.patrickgold.florisboard.crashutility.CrashUtility
|
||||
import dev.patrickgold.florisboard.debug.*
|
||||
import dev.patrickgold.florisboard.ime.clip.ClipboardInputManager
|
||||
import dev.patrickgold.florisboard.ime.clip.FlorisClipboardManager
|
||||
import dev.patrickgold.florisboard.ime.keyboard.KeyData
|
||||
import dev.patrickgold.florisboard.ime.landscapeinput.LandscapeInputUiMode
|
||||
import dev.patrickgold.florisboard.ime.media.MediaInputManager
|
||||
import dev.patrickgold.florisboard.ime.onehanded.OneHandedMode
|
||||
@@ -67,7 +62,6 @@ import dev.patrickgold.florisboard.ime.text.composing.Appender
|
||||
import dev.patrickgold.florisboard.ime.text.composing.Composer
|
||||
import dev.patrickgold.florisboard.ime.text.gestures.SwipeAction
|
||||
import dev.patrickgold.florisboard.ime.text.key.CurrencySet
|
||||
import dev.patrickgold.florisboard.ime.text.key.KeyCode
|
||||
import dev.patrickgold.florisboard.ime.text.keyboard.TextKeyData
|
||||
import dev.patrickgold.florisboard.ime.theme.Theme
|
||||
import dev.patrickgold.florisboard.ime.theme.ThemeManager
|
||||
@@ -75,6 +69,7 @@ import dev.patrickgold.florisboard.setup.SetupActivity
|
||||
import dev.patrickgold.florisboard.util.AppVersionUtils
|
||||
import dev.patrickgold.florisboard.common.ViewUtils
|
||||
import dev.patrickgold.florisboard.databinding.FlorisboardBinding
|
||||
import dev.patrickgold.florisboard.ime.keyboard.InputFeedbackManager
|
||||
import dev.patrickgold.florisboard.ime.keyboard.KeyboardState
|
||||
import dev.patrickgold.florisboard.ime.keyboard.updateKeyboardState
|
||||
import dev.patrickgold.florisboard.util.debugSummarize
|
||||
@@ -133,11 +128,10 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
private set
|
||||
private var eventListeners: CopyOnWriteArrayList<EventListener> = CopyOnWriteArrayList()
|
||||
|
||||
private var audioManager: AudioManager? = null
|
||||
var imeManager: InputMethodManager? = null
|
||||
lateinit var inputFeedbackManager: InputFeedbackManager
|
||||
var florisClipboardManager: FlorisClipboardManager? = null
|
||||
private val themeManager: ThemeManager = ThemeManager.default()
|
||||
private var vibrator: Vibrator? = null
|
||||
|
||||
private var internalBatchNestingLevel: Int = 0
|
||||
private val internalSelectionCache = object {
|
||||
@@ -150,7 +144,7 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
var candidatesEnd: Int = -1
|
||||
}
|
||||
|
||||
var activeEditorInstance: EditorInstance = EditorInstance.default()
|
||||
lateinit var activeEditorInstance: EditorInstance
|
||||
|
||||
val subtypeManager: SubtypeManager get() = SubtypeManager.default()
|
||||
val composer: Composer get() = subtypeManager.imeConfig.composerFromName.getValue(activeSubtype.composerName)
|
||||
@@ -219,10 +213,10 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
flogInfo(LogTopic.IMS_EVENTS)
|
||||
serviceLifecycleDispatcher.onServicePreSuperOnCreate()
|
||||
|
||||
activeEditorInstance = EditorInstance(this, activeState)
|
||||
|
||||
imeManager = getSystemService(Context.INPUT_METHOD_SERVICE) as? InputMethodManager
|
||||
audioManager = getSystemService(Context.AUDIO_SERVICE) as? AudioManager
|
||||
vibrator = getSystemService(Context.VIBRATOR_SERVICE) as? Vibrator
|
||||
prefs.syncSystemSettings()
|
||||
inputFeedbackManager = InputFeedbackManager.new(this)
|
||||
activeSubtype = subtypeManager.getActiveSubtype() ?: Subtype.DEFAULT
|
||||
|
||||
currentThemeIsNight = themeManager.activeTheme.isNightTheme
|
||||
@@ -320,16 +314,15 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
it.close()
|
||||
florisClipboardManager = null
|
||||
}
|
||||
audioManager = null
|
||||
imeManager = null
|
||||
vibrator = null
|
||||
popupLayerView = null
|
||||
uiBinding = null
|
||||
florisboardInstance = null
|
||||
|
||||
eventListeners.toList().forEach { it?.onDestroy() }
|
||||
eventListeners.clear()
|
||||
super.onDestroy()
|
||||
|
||||
florisboardInstance = null
|
||||
}
|
||||
|
||||
override fun onEvaluateFullscreenMode(): Boolean {
|
||||
@@ -351,6 +344,15 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
updateSoftInputWindowLayoutParameters()
|
||||
}
|
||||
|
||||
override fun onUpdateExtractedText(token: Int, text: ExtractedText?) {
|
||||
super.onUpdateExtractedText(token, text)
|
||||
activeEditorInstance.updateText(token, text)
|
||||
}
|
||||
|
||||
override fun onUpdateExtractingViews(ei: EditorInfo?) {
|
||||
super.onUpdateExtractingViews(ei)
|
||||
}
|
||||
|
||||
override fun onUpdateExtractingVisibility(ei: EditorInfo?) {
|
||||
isExtractViewShown = activeState.isRichInputEditor && when (prefs.keyboard.landscapeInputUiMode) {
|
||||
LandscapeInputUiMode.DYNAMICALLY_SHOW -> !activeState.imeOptions.flagNoExtractUi
|
||||
@@ -359,6 +361,10 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
}
|
||||
}
|
||||
|
||||
override fun onBindInput() {
|
||||
activeEditorInstance.bindInput()
|
||||
}
|
||||
|
||||
override fun onStartInput(attribute: EditorInfo?, restarting: Boolean) {
|
||||
flogInfo(LogTopic.IMS_EVENTS)
|
||||
|
||||
@@ -368,7 +374,7 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
} else {
|
||||
ResponseState.RESET
|
||||
}
|
||||
currentInputConnection?.requestCursorUpdates(InputConnection.CURSOR_UPDATE_MONITOR)
|
||||
activeEditorInstance.startInput(attribute)
|
||||
}
|
||||
|
||||
override fun onStartInputView(info: EditorInfo?, restarting: Boolean) {
|
||||
@@ -380,8 +386,8 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
activeState.update(info)
|
||||
activeState.isSelectionMode = (info.initialSelEnd - info.initialSelStart) != 0
|
||||
}
|
||||
activeEditorInstance = EditorInstance.from(info, this, activeState)
|
||||
themeManager.updateRemoteColorValues(activeEditorInstance.packageName)
|
||||
activeEditorInstance.startInputView(info)
|
||||
themeManager.updateRemoteColorValues(activeEditorInstance.packageName ?: "")
|
||||
eventListeners.toList().forEach {
|
||||
it?.onStartInputView(activeEditorInstance, restarting)
|
||||
}
|
||||
@@ -391,13 +397,12 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
override fun onFinishInputView(finishingInput: Boolean) {
|
||||
flogInfo(LogTopic.IMS_EVENTS) { "finishingInput=$finishingInput" }
|
||||
|
||||
if (finishingInput) {
|
||||
activeEditorInstance = EditorInstance.default()
|
||||
} else {
|
||||
if (!finishingInput) {
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.R) {
|
||||
textInputManager.smartbarView?.clearInlineSuggestions()
|
||||
}
|
||||
}
|
||||
activeEditorInstance.finishInputView()
|
||||
|
||||
super.onFinishInputView(finishingInput)
|
||||
eventListeners.toList().forEach { it?.onFinishInputView(finishingInput) }
|
||||
@@ -407,10 +412,14 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
override fun onFinishInput() {
|
||||
flogInfo(LogTopic.IMS_EVENTS)
|
||||
|
||||
currentInputConnection?.requestCursorUpdates(0)
|
||||
activeEditorInstance.finishInput()
|
||||
super.onFinishInput()
|
||||
}
|
||||
|
||||
override fun onUnbindInput() {
|
||||
activeEditorInstance.unbindInput()
|
||||
}
|
||||
|
||||
@RequiresApi(Build.VERSION_CODES.R)
|
||||
override fun onCreateInlineSuggestionsRequest(uiExtras: Bundle): InlineSuggestionsRequest? {
|
||||
return if (prefs.smartbar.enabled && prefs.suggestion.api30InlineSuggestionsEnabled) {
|
||||
@@ -490,7 +499,6 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
}
|
||||
isWindowShown = true
|
||||
|
||||
prefs.syncSystemSettings()
|
||||
val newActiveSubtype = subtypeManager.getActiveSubtype() ?: Subtype.DEFAULT
|
||||
if (newActiveSubtype != activeSubtype) {
|
||||
activeSubtype = newActiveSubtype
|
||||
@@ -593,7 +601,7 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
activeState.isSelectionMode = (newSelEnd - newSelStart) != 0
|
||||
if (internalBatchNestingLevel == 0) {
|
||||
flogInfo(LogTopic.IMS_EVENTS) { "onUpdateSelection($oldSelStart, $oldSelEnd, $newSelStart, $newSelEnd, $candidatesStart, $candidatesEnd)" }
|
||||
activeEditorInstance.onUpdateSelection(
|
||||
activeEditorInstance.updateSelection(
|
||||
oldSelStart, oldSelEnd,
|
||||
newSelStart, newSelEnd,
|
||||
candidatesStart, candidatesEnd
|
||||
@@ -725,84 +733,6 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a key press vibration if the user has this feature enabled in the preferences.
|
||||
*/
|
||||
fun keyPressVibrate(isMovingGestureEffect: Boolean = false) {
|
||||
if (prefs.keyboard.vibrationEnabled) {
|
||||
var vibrationDuration = prefs.keyboard.vibrationDuration.toLong()
|
||||
var vibrationStrength = prefs.keyboard.vibrationStrength
|
||||
|
||||
if (!prefs.keyboard.vibrationEnabledSystem && vibrationDuration < 0 && vibrationStrength < 0) {
|
||||
return
|
||||
}
|
||||
|
||||
val hapticsPerformed = if (vibrationDuration < 0 && vibrationStrength < 0) {
|
||||
if (isMovingGestureEffect && Build.VERSION.SDK_INT >= Build.VERSION_CODES.O_MR1) {
|
||||
uiBinding?.inputWindowView?.performHapticFeedback(HapticFeedbackConstants.TEXT_HANDLE_MOVE)
|
||||
} else {
|
||||
uiBinding?.inputWindowView?.performHapticFeedback(HapticFeedbackConstants.KEYBOARD_TAP)
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
|
||||
if (hapticsPerformed == true) {
|
||||
return
|
||||
}
|
||||
|
||||
if (vibrationDuration == -1L) {
|
||||
vibrationDuration = 36
|
||||
}
|
||||
if (isMovingGestureEffect) {
|
||||
vibrationDuration = (vibrationDuration / 8.0).toLong().coerceAtLeast(1)
|
||||
}
|
||||
if (vibrationStrength == -1 && Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
vibrationStrength = VibrationEffect.DEFAULT_AMPLITUDE
|
||||
} else if (vibrationStrength == -1) {
|
||||
vibrationStrength = 36
|
||||
}
|
||||
if (isMovingGestureEffect && vibrationStrength > 0) {
|
||||
vibrationStrength = (vibrationStrength / 2.0).toInt().coerceAtLeast(1)
|
||||
} else if (isMovingGestureEffect) {
|
||||
vibrationStrength = 8
|
||||
}
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
vibrator?.vibrate(
|
||||
VibrationEffect.createOneShot(
|
||||
vibrationDuration, vibrationStrength
|
||||
)
|
||||
)
|
||||
} else {
|
||||
@Suppress("DEPRECATION")
|
||||
vibrator?.vibrate(vibrationDuration)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a key press sound if the user has this feature enabled in the preferences.
|
||||
*/
|
||||
fun keyPressSound(keyData: KeyData? = null) {
|
||||
if (prefs.keyboard.soundEnabled) {
|
||||
val soundVolume = prefs.keyboard.soundVolume
|
||||
val effect = when (keyData) {
|
||||
is TextKeyData -> when (keyData.code) {
|
||||
KeyCode.SPACE -> AudioManager.FX_KEYPRESS_SPACEBAR
|
||||
KeyCode.DELETE -> AudioManager.FX_KEYPRESS_DELETE
|
||||
KeyCode.ENTER -> AudioManager.FX_KEYPRESS_RETURN
|
||||
else -> AudioManager.FX_KEYPRESS_STANDARD
|
||||
}
|
||||
else -> AudioManager.FX_KEYPRESS_STANDARD
|
||||
}
|
||||
if (soundVolume == -1 && prefs.keyboard.soundEnabledSystem) {
|
||||
audioManager!!.playSoundEffect(effect)
|
||||
} else if (soundVolume > 0) {
|
||||
audioManager!!.playSoundEffect(effect, soundVolume / 100f)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes a given [SwipeAction]. Ignores any [SwipeAction] but the ones relevant for this
|
||||
* class.
|
||||
@@ -1053,7 +983,7 @@ open class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardMa
|
||||
|
||||
val tmpSubtypeList = mutableListOf<Pair<String, String>>()
|
||||
for (defaultSubtype in defaultSubtypes) {
|
||||
tmpSubtypeList.add(Pair(defaultSubtype.locale.toString(), defaultSubtype.locale.displayName))
|
||||
tmpSubtypeList.add(Pair(defaultSubtype.locale.localeTag(), defaultSubtype.locale.displayName()))
|
||||
}
|
||||
// Sort language list alphabetically by the display name of a language
|
||||
tmpSubtypeList.sortBy { it.second }
|
||||
|
||||
@@ -20,12 +20,12 @@ import android.os.SystemClock
|
||||
import android.util.SparseArray
|
||||
import androidx.core.util.forEach
|
||||
import androidx.core.util.set
|
||||
import dev.patrickgold.florisboard.BuildConfig
|
||||
import dev.patrickgold.florisboard.debug.LogTopic
|
||||
import dev.patrickgold.florisboard.debug.flogDebug
|
||||
import dev.patrickgold.florisboard.ime.keyboard.KeyData
|
||||
import dev.patrickgold.florisboard.ime.text.key.KeyCode
|
||||
import dev.patrickgold.florisboard.ime.text.keyboard.TextKeyData
|
||||
import kotlinx.coroutines.*
|
||||
import kotlinx.coroutines.channels.Channel
|
||||
import timber.log.Timber
|
||||
|
||||
/**
|
||||
* The main logic point of processing input events and delegating them to the registered event receivers. Currently,
|
||||
@@ -96,9 +96,7 @@ class InputEventDispatcher private constructor(
|
||||
for (ev in channel) {
|
||||
if (!isActive) break
|
||||
val startTime = System.nanoTime()
|
||||
if (BuildConfig.DEBUG) {
|
||||
Timber.d(ev.toString())
|
||||
}
|
||||
flogDebug(LogTopic.KEY_EVENTS) { ev.toString() }
|
||||
when (ev.action) {
|
||||
InputKeyEvent.Action.DOWN -> {
|
||||
if (pressedKeys.indexOfKey(ev.data.code) >= 0) continue
|
||||
@@ -155,9 +153,7 @@ class InputEventDispatcher private constructor(
|
||||
}
|
||||
}
|
||||
}
|
||||
if (BuildConfig.DEBUG) {
|
||||
Timber.d("Time elapsed: ${(System.nanoTime() - startTime) / 1_000_000}")
|
||||
}
|
||||
flogDebug(LogTopic.KEY_EVENTS) { "Time elapsed: ${(System.nanoTime() - startTime) / 1_000_000}" }
|
||||
}
|
||||
pressedKeys.forEach { _, value -> value.repeatKeyPressJob?.cancel() }
|
||||
pressedKeys.clear()
|
||||
@@ -210,7 +206,7 @@ class InputEventDispatcher private constructor(
|
||||
data class InputKeyEvent(
|
||||
val eventTime: Long,
|
||||
val action: Action,
|
||||
val data: TextKeyData,
|
||||
val data: KeyData,
|
||||
val count: Int
|
||||
) {
|
||||
companion object {
|
||||
@@ -221,7 +217,7 @@ data class InputKeyEvent(
|
||||
*
|
||||
* @return The created input key event.
|
||||
*/
|
||||
fun down(keyData: TextKeyData): InputKeyEvent {
|
||||
fun down(keyData: KeyData): InputKeyEvent {
|
||||
return InputKeyEvent(
|
||||
eventTime = SystemClock.uptimeMillis(),
|
||||
action = Action.DOWN,
|
||||
@@ -238,7 +234,7 @@ data class InputKeyEvent(
|
||||
*
|
||||
* @return The created input key event.
|
||||
*/
|
||||
fun downUp(keyData: TextKeyData, count: Int = 1): InputKeyEvent {
|
||||
fun downUp(keyData: KeyData, count: Int = 1): InputKeyEvent {
|
||||
return InputKeyEvent(
|
||||
eventTime = SystemClock.uptimeMillis(),
|
||||
action = Action.DOWN_UP,
|
||||
@@ -254,7 +250,7 @@ data class InputKeyEvent(
|
||||
*
|
||||
* @return The created input key event.
|
||||
*/
|
||||
fun up(keyData: TextKeyData): InputKeyEvent {
|
||||
fun up(keyData: KeyData): InputKeyEvent {
|
||||
return InputKeyEvent(
|
||||
eventTime = SystemClock.uptimeMillis(),
|
||||
action = Action.UP,
|
||||
@@ -271,7 +267,7 @@ data class InputKeyEvent(
|
||||
*
|
||||
* @return The created input key event.
|
||||
*/
|
||||
fun repeat(keyData: TextKeyData, count: Int = 1): InputKeyEvent {
|
||||
fun repeat(keyData: KeyData, count: Int = 1): InputKeyEvent {
|
||||
return InputKeyEvent(
|
||||
eventTime = SystemClock.uptimeMillis(),
|
||||
action = Action.REPEAT,
|
||||
@@ -287,7 +283,7 @@ data class InputKeyEvent(
|
||||
*
|
||||
* @return The created input key event.
|
||||
*/
|
||||
fun cancel(keyData: TextKeyData): InputKeyEvent {
|
||||
fun cancel(keyData: KeyData): InputKeyEvent {
|
||||
return InputKeyEvent(
|
||||
eventTime = SystemClock.uptimeMillis(),
|
||||
action = Action.CANCEL,
|
||||
|
||||
@@ -19,7 +19,6 @@ package dev.patrickgold.florisboard.ime.core
|
||||
import android.content.Context
|
||||
import android.content.SharedPreferences
|
||||
import android.os.Build
|
||||
import android.provider.Settings
|
||||
import androidx.core.os.UserManagerCompat
|
||||
import androidx.preference.PreferenceManager
|
||||
import dev.patrickgold.florisboard.R
|
||||
@@ -57,6 +56,7 @@ class Preferences(
|
||||
val dictionary = Dictionary(this)
|
||||
val gestures = Gestures(this)
|
||||
val glide = Glide(this)
|
||||
val inputFeedback = InputFeedback(this)
|
||||
val internal = Internal(this)
|
||||
val keyboard = Keyboard(this)
|
||||
val localization = Localization(this)
|
||||
@@ -152,21 +152,6 @@ class Preferences(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Syncs the system preference values and clears the cache.
|
||||
*/
|
||||
fun syncSystemSettings() {
|
||||
applicationContext.get()?.let { context ->
|
||||
val contentResolver = context.contentResolver
|
||||
keyboard.soundEnabledSystem = Settings.System.getInt(
|
||||
contentResolver, Settings.System.SOUND_EFFECTS_ENABLED, 0
|
||||
) != 0
|
||||
keyboard.vibrationEnabledSystem = Settings.System.getInt(
|
||||
contentResolver, Settings.System.HAPTIC_FEEDBACK_ENABLED, 0
|
||||
) != 0
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper class for advanced preferences.
|
||||
*/
|
||||
@@ -217,6 +202,7 @@ class Preferences(
|
||||
companion object {
|
||||
const val ENABLED = "devtools__enabled"
|
||||
const val SHOW_HEAP_MEMORY_STATS = "devtools__show_heap_memory_stats"
|
||||
const val OVERRIDE_WS_MIN_HEAP_RES = "devtools__override_word_suggestions_min_heap_restriction"
|
||||
const val CLEAR_UDM_INTERNAL_DATABASE = "devtools__clear_udm_internal_database"
|
||||
}
|
||||
|
||||
@@ -226,6 +212,9 @@ class Preferences(
|
||||
var showHeapMemoryStats: Boolean
|
||||
get() = prefs.getPref(SHOW_HEAP_MEMORY_STATS, false)
|
||||
set(v) = prefs.setPref(SHOW_HEAP_MEMORY_STATS, v)
|
||||
var overrideWordSuggestionsMinHeapRestriction: Boolean
|
||||
get() = prefs.getPref(OVERRIDE_WS_MIN_HEAP_RES, false)
|
||||
set(v) = prefs.setPref(OVERRIDE_WS_MIN_HEAP_RES, v)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -329,6 +318,90 @@ class Preferences(
|
||||
set(v) = prefs.setPref(PREVIEW_REFRESH_DELAY, v)
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper class for internal preferences. A preference qualifies as an internal pref if the
|
||||
* user has no ability to control this preference's value directly (via a UI pref view).
|
||||
*/
|
||||
class InputFeedback(private val prefs: Preferences) {
|
||||
companion object {
|
||||
const val AUDIO_ENABLED = "input_feedback__audio_enabled"
|
||||
const val AUDIO_IGNORE_SYSTEM_SETTINGS = "input_feedback__audio_ignore_system_settings"
|
||||
const val AUDIO_VOLUME = "input_feedback__audio_volume"
|
||||
const val AUDIO_FEAT_KEY_PRESS = "input_feedback__audio_feat_key_press"
|
||||
const val AUDIO_FEAT_KEY_LONG_PRESS = "input_feedback__audio_feat_key_long_press"
|
||||
const val AUDIO_FEAT_KEY_REPEATED_ACTION = "input_feedback__audio_feat_key_repeated_action"
|
||||
const val AUDIO_FEAT_GESTURE_SWIPE = "input_feedback__audio_feat_gesture_swipe"
|
||||
const val AUDIO_FEAT_GESTURE_MOVING_SWIPE = "input_feedback__audio_feat_gesture_moving_swipe"
|
||||
|
||||
const val HAPTIC_ENABLED = "input_feedback__haptic_enabled"
|
||||
const val HAPTIC_IGNORE_SYSTEM_SETTINGS = "input_feedback__haptic_ignore_system_settings"
|
||||
const val HAPTIC_USE_VIBRATOR = "input_feedback__haptic_use_vibrator"
|
||||
const val HAPTIC_VIBRATION_DURATION = "input_feedback__haptic_vibration_duration"
|
||||
const val HAPTIC_VIBRATION_STRENGTH = "input_feedback__haptic_vibration_strength"
|
||||
const val HAPTIC_FEAT_KEY_PRESS = "input_feedback__haptic_feat_key_press"
|
||||
const val HAPTIC_FEAT_KEY_LONG_PRESS = "input_feedback__haptic_feat_key_long_press"
|
||||
const val HAPTIC_FEAT_KEY_REPEATED_ACTION = "input_feedback__haptic_feat_key_repeated_action"
|
||||
const val HAPTIC_FEAT_GESTURE_SWIPE = "input_feedback__haptic_feat_gesture_swipe"
|
||||
const val HAPTIC_FEAT_GESTURE_MOVING_SWIPE = "input_feedback__haptic_feat_gesture_moving_swipe"
|
||||
}
|
||||
|
||||
var audioEnabled: Boolean
|
||||
get() = prefs.getPref(AUDIO_ENABLED, true)
|
||||
set(v) = prefs.setPref(AUDIO_ENABLED, v)
|
||||
var audioIgnoreSystemSettings: Boolean
|
||||
get() = prefs.getPref(AUDIO_IGNORE_SYSTEM_SETTINGS, false)
|
||||
set(v) = prefs.setPref(AUDIO_IGNORE_SYSTEM_SETTINGS, v)
|
||||
var audioVolume: Int
|
||||
get() = prefs.getPref(AUDIO_VOLUME, 50)
|
||||
set(v) = prefs.setPref(AUDIO_VOLUME, v)
|
||||
var audioFeatKeyPress: Boolean
|
||||
get() = prefs.getPref(AUDIO_FEAT_KEY_PRESS, true)
|
||||
set(v) = prefs.setPref(AUDIO_FEAT_KEY_PRESS, v)
|
||||
var audioFeatKeyLongPress: Boolean
|
||||
get() = prefs.getPref(AUDIO_FEAT_KEY_LONG_PRESS, false)
|
||||
set(v) = prefs.setPref(AUDIO_FEAT_KEY_LONG_PRESS, v)
|
||||
var audioFeatKeyRepeatedAction: Boolean
|
||||
get() = prefs.getPref(AUDIO_FEAT_KEY_REPEATED_ACTION, false)
|
||||
set(v) = prefs.setPref(AUDIO_FEAT_KEY_REPEATED_ACTION, v)
|
||||
var audioFeatGestureSwipe: Boolean
|
||||
get() = prefs.getPref(AUDIO_FEAT_GESTURE_SWIPE, false)
|
||||
set(v) = prefs.setPref(AUDIO_FEAT_GESTURE_SWIPE, v)
|
||||
var audioFeatGestureMovingSwipe: Boolean
|
||||
get() = prefs.getPref(AUDIO_FEAT_GESTURE_MOVING_SWIPE, false)
|
||||
set(v) = prefs.setPref(AUDIO_FEAT_GESTURE_MOVING_SWIPE, v)
|
||||
|
||||
var hapticEnabled: Boolean
|
||||
get() = prefs.getPref(HAPTIC_ENABLED, true)
|
||||
set(v) = prefs.setPref(HAPTIC_ENABLED, v)
|
||||
var hapticIgnoreSystemSettings: Boolean
|
||||
get() = prefs.getPref(HAPTIC_IGNORE_SYSTEM_SETTINGS, false)
|
||||
set(v) = prefs.setPref(HAPTIC_IGNORE_SYSTEM_SETTINGS, v)
|
||||
var hapticUseVibrator: Boolean
|
||||
get() = prefs.getPref(HAPTIC_USE_VIBRATOR, true)
|
||||
set(v) = prefs.setPref(HAPTIC_USE_VIBRATOR, v)
|
||||
var hapticVibrationDuration: Int
|
||||
get() = prefs.getPref(HAPTIC_VIBRATION_DURATION, 50)
|
||||
set(v) = prefs.setPref(HAPTIC_VIBRATION_DURATION, v)
|
||||
var hapticVibrationStrength: Int
|
||||
get() = prefs.getPref(HAPTIC_VIBRATION_STRENGTH, 50)
|
||||
set(v) = prefs.setPref(HAPTIC_VIBRATION_STRENGTH, v)
|
||||
var hapticFeatKeyPress: Boolean
|
||||
get() = prefs.getPref(HAPTIC_FEAT_KEY_PRESS, true)
|
||||
set(v) = prefs.setPref(HAPTIC_FEAT_KEY_PRESS, v)
|
||||
var hapticFeatKeyLongPress: Boolean
|
||||
get() = prefs.getPref(HAPTIC_FEAT_KEY_LONG_PRESS, false)
|
||||
set(v) = prefs.setPref(HAPTIC_FEAT_KEY_LONG_PRESS, v)
|
||||
var hapticFeatKeyRepeatedAction: Boolean
|
||||
get() = prefs.getPref(HAPTIC_FEAT_KEY_REPEATED_ACTION, true)
|
||||
set(v) = prefs.setPref(HAPTIC_FEAT_KEY_REPEATED_ACTION, v)
|
||||
var hapticFeatGestureSwipe: Boolean
|
||||
get() = prefs.getPref(HAPTIC_FEAT_GESTURE_SWIPE, false)
|
||||
set(v) = prefs.setPref(HAPTIC_FEAT_GESTURE_SWIPE, v)
|
||||
var hapticFeatGestureMovingSwipe: Boolean
|
||||
get() = prefs.getPref(HAPTIC_FEAT_GESTURE_MOVING_SWIPE, true)
|
||||
set(v) = prefs.setPref(HAPTIC_FEAT_GESTURE_MOVING_SWIPE, v)
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper class for internal preferences. A preference qualifies as an internal pref if the
|
||||
* user has no ability to control this preference's value directly (via a UI pref view).
|
||||
@@ -377,14 +450,9 @@ class Preferences(
|
||||
const val ONE_HANDED_MODE = "keyboard__one_handed_mode"
|
||||
const val ONE_HANDED_MODE_SCALE_FACTOR = "keyboard__one_handed_mode_scale_factor"
|
||||
const val POPUP_ENABLED = "keyboard__popup_enabled"
|
||||
const val SOUND_ENABLED = "keyboard__sound_enabled"
|
||||
const val SOUND_VOLUME = "keyboard__sound_volume"
|
||||
const val SPACE_BAR_SWITCHES_TO_CHARACTERS = "keyboard__space_bar_switches_to_characters"
|
||||
const val UTILITY_KEY_ACTION = "keyboard__utility_key_action"
|
||||
const val UTILITY_KEY_ENABLED = "keyboard__utility_key_enabled"
|
||||
const val VIBRATION_ENABLED = "keyboard__vibration_enabled"
|
||||
const val VIBRATION_DURATION = "keyboard__vibration_duration"
|
||||
const val VIBRATION_STRENGTH = "keyboard__vibration_strength"
|
||||
}
|
||||
|
||||
var bottomOffsetPortrait: Int = 0
|
||||
@@ -438,13 +506,6 @@ class Preferences(
|
||||
var popupEnabled: Boolean = false
|
||||
get() = prefs.getPref(POPUP_ENABLED, true)
|
||||
private set
|
||||
var soundEnabled: Boolean = false
|
||||
get() = prefs.getPref(SOUND_ENABLED, true)
|
||||
private set
|
||||
var soundEnabledSystem: Boolean = false
|
||||
var soundVolume: Int = 0
|
||||
get() = prefs.getPref(SOUND_VOLUME, -1)
|
||||
private set
|
||||
var spaceBarSwitchesToCharacters: Boolean
|
||||
get() = prefs.getPref(SPACE_BAR_SWITCHES_TO_CHARACTERS, true)
|
||||
set(v) = prefs.setPref(SPACE_BAR_SWITCHES_TO_CHARACTERS, v)
|
||||
@@ -454,16 +515,6 @@ class Preferences(
|
||||
var utilityKeyEnabled: Boolean
|
||||
get() = prefs.getPref(UTILITY_KEY_ENABLED, true)
|
||||
set(v) = prefs.setPref(UTILITY_KEY_ENABLED, v)
|
||||
var vibrationEnabled: Boolean = false
|
||||
get() = prefs.getPref(VIBRATION_ENABLED, true)
|
||||
private set
|
||||
var vibrationEnabledSystem: Boolean = false
|
||||
var vibrationDuration: Int = 0
|
||||
get() = prefs.getPref(VIBRATION_DURATION, -1)
|
||||
private set
|
||||
var vibrationStrength: Int = 0
|
||||
get() = prefs.getPref(VIBRATION_STRENGTH, -1)
|
||||
private set
|
||||
|
||||
fun keyHintConfiguration(): KeyHintConfiguration {
|
||||
return KeyHintConfiguration(hintedSymbolsMode, hintedNumberRowMode, mergeHintPopupsEnabled)
|
||||
|
||||
@@ -16,16 +16,10 @@
|
||||
|
||||
package dev.patrickgold.florisboard.ime.core
|
||||
|
||||
import dev.patrickgold.florisboard.common.FlorisLocale
|
||||
import dev.patrickgold.florisboard.ime.text.composing.Appender
|
||||
import dev.patrickgold.florisboard.ime.text.composing.Composer
|
||||
import dev.patrickgold.florisboard.ime.text.layout.LayoutType
|
||||
import dev.patrickgold.florisboard.util.LocaleUtils
|
||||
import kotlinx.serialization.*
|
||||
import kotlinx.serialization.descriptors.PrimitiveKind
|
||||
import kotlinx.serialization.descriptors.PrimitiveSerialDescriptor
|
||||
import kotlinx.serialization.descriptors.SerialDescriptor
|
||||
import kotlinx.serialization.encoding.Decoder
|
||||
import kotlinx.serialization.encoding.Encoder
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
@@ -40,7 +34,7 @@ import java.util.*
|
||||
*/
|
||||
data class Subtype(
|
||||
val id: Int,
|
||||
val locale: Locale,
|
||||
val locale: FlorisLocale,
|
||||
val composerName: String,
|
||||
val currencySetName: String,
|
||||
val layoutMap: SubtypeLayoutMap,
|
||||
@@ -53,7 +47,7 @@ data class Subtype(
|
||||
*/
|
||||
val DEFAULT = Subtype(
|
||||
id = -1,
|
||||
locale = Locale.ENGLISH,
|
||||
locale = FlorisLocale.ENGLISH,
|
||||
composerName = Appender.name,
|
||||
currencySetName = "\$default",
|
||||
layoutMap = SubtypeLayoutMap(characters = "qwerty")
|
||||
@@ -74,7 +68,7 @@ data class Subtype(
|
||||
val data = str.split("/")
|
||||
when (data.size) {
|
||||
4 -> {
|
||||
val locale = LocaleUtils.stringToLocale(data[1])
|
||||
val locale = FlorisLocale.fromTag(data[1])
|
||||
return Subtype(
|
||||
data[0].toInt(),
|
||||
locale,
|
||||
@@ -84,7 +78,7 @@ data class Subtype(
|
||||
)
|
||||
}
|
||||
5 -> {
|
||||
val locale = LocaleUtils.stringToLocale(data[1])
|
||||
val locale = FlorisLocale.fromTag(data[1])
|
||||
return Subtype(
|
||||
data[0].toInt(),
|
||||
locale,
|
||||
@@ -114,7 +108,7 @@ data class Subtype(
|
||||
* <id>/<language_tag>/<composer_name>/<currency_set_name>/<layout_map>
|
||||
*/
|
||||
override fun toString(): String {
|
||||
val languageTag = locale.toLanguageTag()
|
||||
val languageTag = locale.languageTag()
|
||||
return "$id/$languageTag/$composerName/$currencySetName/$layoutMap"
|
||||
}
|
||||
|
||||
@@ -123,7 +117,7 @@ data class Subtype(
|
||||
* <id>/<language_tag>/<currency_set_name>
|
||||
*/
|
||||
fun toShortString(): String {
|
||||
val languageTag = locale.toLanguageTag()
|
||||
val languageTag = locale.languageTag()
|
||||
return "$id/$languageTag/$currencySetName"
|
||||
}
|
||||
|
||||
@@ -326,24 +320,12 @@ data class SubtypeLayoutMap(
|
||||
@Serializable
|
||||
data class DefaultSubtype(
|
||||
var id: Int,
|
||||
@Serializable(with = LocaleSerializer::class)
|
||||
@Serializable(with = FlorisLocale.Serializer::class)
|
||||
@SerialName("languageTag")
|
||||
var locale: Locale,
|
||||
var locale: FlorisLocale,
|
||||
@SerialName("composer")
|
||||
var composerName: String,
|
||||
@SerialName("currencySet")
|
||||
var currencySetName: String,
|
||||
var preferred: SubtypeLayoutMap
|
||||
)
|
||||
|
||||
class LocaleSerializer : KSerializer<Locale> {
|
||||
override val descriptor: SerialDescriptor = PrimitiveSerialDescriptor("Locale", PrimitiveKind.STRING)
|
||||
|
||||
override fun serialize(encoder: Encoder, value: Locale) {
|
||||
encoder.encodeString(value.toString())
|
||||
}
|
||||
|
||||
override fun deserialize(decoder: Decoder): Locale {
|
||||
return LocaleUtils.stringToLocale(decoder.decodeString())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,14 +17,13 @@
|
||||
package dev.patrickgold.florisboard.ime.core
|
||||
|
||||
import android.content.Context
|
||||
import dev.patrickgold.florisboard.common.FlorisLocale
|
||||
import dev.patrickgold.florisboard.debug.*
|
||||
import dev.patrickgold.florisboard.res.AssetManager
|
||||
import dev.patrickgold.florisboard.res.AssetRef
|
||||
import dev.patrickgold.florisboard.res.AssetSource
|
||||
import dev.patrickgold.florisboard.ime.text.key.CurrencySet
|
||||
import dev.patrickgold.florisboard.res.FlorisRef
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.MainScope
|
||||
import java.util.*
|
||||
import kotlin.collections.ArrayList
|
||||
|
||||
/**
|
||||
@@ -85,7 +84,7 @@ class SubtypeManager(
|
||||
* @return The [FlorisBoard.ImeConfig] or a default config.
|
||||
*/
|
||||
private fun loadImeConfig(path: String): FlorisBoard.ImeConfig {
|
||||
return assetManager.loadJsonAsset<FlorisBoard.ImeConfig>(AssetRef(AssetSource.Assets, path)).getOrElse {
|
||||
return assetManager.loadJsonAsset<FlorisBoard.ImeConfig>(FlorisRef.assets(path)).getOrElse {
|
||||
flogError(LogTopic.SUBTYPE_MANAGER) { "Failed to retrieve IME config: $it" }
|
||||
FlorisBoard.ImeConfig(packageName)
|
||||
}
|
||||
@@ -118,7 +117,7 @@ class SubtypeManager(
|
||||
* @return True if the subtype was added, false otherwise. A return value of false indicates
|
||||
* that the subtype already exists.
|
||||
*/
|
||||
fun addSubtype(locale: Locale, composerName: String, currencySetName: String, layoutMap: SubtypeLayoutMap): Boolean {
|
||||
fun addSubtype(locale: FlorisLocale, composerName: String, currencySetName: String, layoutMap: SubtypeLayoutMap): Boolean {
|
||||
return addSubtype(
|
||||
Subtype(
|
||||
(locale.hashCode() + 31 * layoutMap.hashCode() + 31 * currencySetName.hashCode()),
|
||||
@@ -186,7 +185,7 @@ class SubtypeManager(
|
||||
* @return The default system locale or null, if no matching default system subtype could be
|
||||
* found.
|
||||
*/
|
||||
fun getDefaultSubtypeForLocale(locale: Locale): DefaultSubtype? {
|
||||
fun getDefaultSubtypeForLocale(locale: FlorisLocale): DefaultSubtype? {
|
||||
for (defaultSubtype in imeConfig.defaultSubtypes) {
|
||||
if (defaultSubtype.locale == locale) {
|
||||
return defaultSubtype
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.ime.core
|
||||
|
||||
import dev.patrickgold.florisboard.common.FlorisLocale
|
||||
|
||||
@Suppress("RegExpRedundantEscape")
|
||||
object TextProcessor {
|
||||
private val LATIN_BASIC_WORD_REGEX = """[_]*(([\p{L}\d\']+[_-]*[\p{L}\d\']+)|[\p{L}\d\']+)[_]*""".toRegex()
|
||||
|
||||
private fun wordRegexFor(locale: FlorisLocale): Regex {
|
||||
return when (locale) {
|
||||
else -> LATIN_BASIC_WORD_REGEX
|
||||
}
|
||||
}
|
||||
|
||||
fun detectWords(text: CharSequence, locale: FlorisLocale): Sequence<IntRange> {
|
||||
val regex = wordRegexFor(locale)
|
||||
return regex.findAll(text).map { it.range }
|
||||
}
|
||||
|
||||
fun detectWords(text: CharSequence, start: Int, end: Int, locale: FlorisLocale): Sequence<IntRange> {
|
||||
val regex = wordRegexFor(locale)
|
||||
val tStart = start.coerceAtLeast(0)
|
||||
val tEnd = end.coerceAtMost(text.length)
|
||||
return regex.findAll(text.slice(tStart..tEnd)).map { it.range }
|
||||
}
|
||||
|
||||
fun isWord(text: CharSequence, locale: FlorisLocale): Boolean {
|
||||
val regex = wordRegexFor(locale)
|
||||
return regex.matches(text)
|
||||
}
|
||||
}
|
||||
@@ -18,15 +18,16 @@ package dev.patrickgold.florisboard.ime.dictionary
|
||||
|
||||
import android.content.Context
|
||||
import androidx.room.Room
|
||||
import dev.patrickgold.florisboard.ime.core.FlorisBoard
|
||||
import dev.patrickgold.florisboard.common.FlorisLocale
|
||||
import dev.patrickgold.florisboard.debug.flogError
|
||||
import dev.patrickgold.florisboard.ime.core.Preferences
|
||||
import dev.patrickgold.florisboard.ime.core.Subtype
|
||||
import dev.patrickgold.florisboard.ime.nlp.SuggestionList
|
||||
import dev.patrickgold.florisboard.ime.nlp.Word
|
||||
import dev.patrickgold.florisboard.res.FlorisRef
|
||||
import kotlinx.coroutines.CoroutineDispatcher
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import java.lang.ref.WeakReference
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* TODO: document
|
||||
@@ -44,6 +45,8 @@ class DictionaryManager private constructor(
|
||||
private var systemUserDictionaryDatabase: SystemUserDictionaryDatabase? = null
|
||||
|
||||
companion object {
|
||||
val FLORIS_EN_REF = FlorisRef.assets("ime/dict/en.flict")
|
||||
|
||||
private var defaultInstance: DictionaryManager? = null
|
||||
|
||||
fun init(applicationContext: Context): DictionaryManager {
|
||||
@@ -74,15 +77,39 @@ class DictionaryManager private constructor(
|
||||
) {
|
||||
val suggestions = SuggestionList.new(maxSuggestionCount)
|
||||
queryUserDictionary(currentWord, subtype.locale, suggestions)
|
||||
loadDictionary(FLORIS_EN_REF).onSuccess {
|
||||
it.getTokenPredictions(preceidingWords, currentWord, maxSuggestionCount, allowPossiblyOffensive, suggestions)
|
||||
}
|
||||
block(suggestions)
|
||||
suggestions.dispose()
|
||||
}
|
||||
|
||||
fun prepareDictionaries(subtype: Subtype) {
|
||||
// TODO: Implement this
|
||||
fun loadDictionary(ref: FlorisRef): Result<Dictionary> {
|
||||
dictionaryCache[ref.toString()]?.let {
|
||||
return Result.success(it)
|
||||
}
|
||||
if (ref.relativePath.endsWith(".flict")) {
|
||||
// Assume this is a Flictionary
|
||||
applicationContext.get()?.let {
|
||||
Flictionary.load(it, ref).onSuccess { flict ->
|
||||
dictionaryCache[ref.toString()] = flict
|
||||
return Result.success(flict)
|
||||
}.onFailure { err ->
|
||||
flogError { err.toString() }
|
||||
return Result.failure(err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Result.failure(Exception("Unable to determine supported type for given AssetRef!"))
|
||||
}
|
||||
return Result.failure(Exception("If this message is ever thrown, something is completely broken..."))
|
||||
}
|
||||
|
||||
fun queryUserDictionary(word: Word, locale: Locale, destSuggestionList: SuggestionList) {
|
||||
fun prepareDictionaries(subtype: Subtype) {
|
||||
loadDictionary(FLORIS_EN_REF)
|
||||
}
|
||||
|
||||
fun queryUserDictionary(word: Word, locale: FlorisLocale, destSuggestionList: SuggestionList) {
|
||||
val florisDao = florisUserDictionaryDao()
|
||||
val systemDao = systemUserDictionaryDao()
|
||||
if (florisDao == null && systemDao == null) {
|
||||
@@ -114,7 +141,7 @@ class DictionaryManager private constructor(
|
||||
}
|
||||
}
|
||||
|
||||
fun spell(word: Word, locale: Locale): Boolean {
|
||||
fun spell(word: Word, locale: FlorisLocale): Boolean {
|
||||
val florisDao = florisUserDictionaryDao()
|
||||
val systemDao = systemUserDictionaryDao()
|
||||
if (florisDao == null && systemDao == null) {
|
||||
|
||||
@@ -16,7 +16,10 @@
|
||||
|
||||
package dev.patrickgold.florisboard.ime.dictionary
|
||||
|
||||
import android.content.Context
|
||||
import dev.patrickgold.florisboard.ime.nlp.*
|
||||
import dev.patrickgold.florisboard.res.FlorisRef
|
||||
import java.io.InputStream
|
||||
|
||||
/**
|
||||
* Class Flictionary which takes care of loading the binary asset as well as providing words for
|
||||
@@ -25,14 +28,14 @@ import dev.patrickgold.florisboard.ime.nlp.*
|
||||
* This class accepts binary dictionary files of the type "flict" as defined in here:
|
||||
* https://github.com/florisboard/dictionary-tools/blob/main/flictionary.md
|
||||
*/
|
||||
/**
|
||||
class Flictionary private constructor(
|
||||
override val name: String,
|
||||
override val label: String,
|
||||
override val authors: List<String>,
|
||||
private val date: Long,
|
||||
private val version: Int,
|
||||
private val headerStr: String
|
||||
private val headerStr: String,
|
||||
private val languageModel: FlorisLanguageModel
|
||||
) : Dictionary {
|
||||
companion object {
|
||||
private const val VERSION_0 = 0x0
|
||||
@@ -63,11 +66,11 @@ class Flictionary private constructor(
|
||||
* either the parsed dictionary or an exception giving information about the error which
|
||||
* occurred.
|
||||
*/
|
||||
fun load(context: Context, assetRef: AssetRef): Result<Flictionary> {
|
||||
fun load(context: Context, assetRef: FlorisRef): Result<Flictionary> {
|
||||
val buffer = ByteArray(5000) { 0 }
|
||||
val inputStream: InputStream
|
||||
if (assetRef.source == AssetSource.Assets) {
|
||||
inputStream = context.assets.open(assetRef.path)
|
||||
if (assetRef.isAssets) {
|
||||
inputStream = context.assets.open(assetRef.relativePath)
|
||||
} else {
|
||||
return Result.failure(Exception("Only AssetSource.Assets is currently supported!"))
|
||||
}
|
||||
@@ -291,26 +294,25 @@ class Flictionary private constructor(
|
||||
|
||||
// TODO: preceding tokens are currently ignored
|
||||
override fun getTokenPredictions(
|
||||
precedingTokens: List<Token<String>>,
|
||||
currentToken: Token<String>?,
|
||||
precedingTokens: List<Word>,
|
||||
currentToken: Word?,
|
||||
maxSuggestionCount: Int,
|
||||
allowPossiblyOffensive: Boolean
|
||||
): List<WeightedToken<String, Int>> {
|
||||
currentToken ?: return listOf()
|
||||
allowPossiblyOffensive: Boolean,
|
||||
destSuggestionList: SuggestionList
|
||||
) {
|
||||
currentToken ?: return
|
||||
|
||||
return if (currentToken.data.isNotEmpty()) {
|
||||
if (currentToken.isNotBlank()) {
|
||||
val retList = languageModel.matchAllNgrams(
|
||||
ngram = Ngram(
|
||||
_tokens = listOf(Token(currentToken.data.lowercase())),
|
||||
_tokens = listOf(Token(currentToken.lowercase())),
|
||||
_freq = -1
|
||||
),
|
||||
maxEditDistance = 2,
|
||||
maxTokenCount = maxSuggestionCount,
|
||||
allowPossiblyOffensive = allowPossiblyOffensive
|
||||
)
|
||||
retList
|
||||
} else {
|
||||
listOf()
|
||||
retList.forEach { destSuggestionList.add(it.data, 128) }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -422,4 +424,3 @@ fun InputStream.readNext(b: ByteArray, off: Int, len: Int): Int {
|
||||
}
|
||||
return lenRead
|
||||
}
|
||||
*/
|
||||
|
||||
@@ -33,10 +33,9 @@ import androidx.room.RoomDatabase
|
||||
import androidx.room.TypeConverter
|
||||
import androidx.room.TypeConverters
|
||||
import androidx.room.Update
|
||||
import dev.patrickgold.florisboard.common.FlorisLocale
|
||||
import dev.patrickgold.florisboard.res.ExternalContentUtils
|
||||
import dev.patrickgold.florisboard.util.LocaleUtils
|
||||
import java.lang.ref.WeakReference
|
||||
import java.util.*
|
||||
|
||||
private const val WORDS_TABLE = "words"
|
||||
|
||||
@@ -89,31 +88,31 @@ interface UserDictionaryDao {
|
||||
fun query(word: String): List<UserDictionaryEntry>
|
||||
|
||||
@Query("$SELECT_ALL_FROM_WORDS WHERE ${UserDictionary.Words.WORD} LIKE '%' || :word || '%' AND $LOCALE_MATCHES")
|
||||
fun query(word: String, locale: Locale?): List<UserDictionaryEntry>
|
||||
fun query(word: String, locale: FlorisLocale?): List<UserDictionaryEntry>
|
||||
|
||||
@Query("$SELECT_ALL_FROM_WORDS WHERE ${UserDictionary.Words.SHORTCUT} = :shortcut")
|
||||
fun queryShortcut(shortcut: String): List<UserDictionaryEntry>
|
||||
|
||||
@Query("$SELECT_ALL_FROM_WORDS WHERE ${UserDictionary.Words.SHORTCUT} = :shortcut AND $LOCALE_MATCHES")
|
||||
fun queryShortcut(shortcut: String, locale: Locale?): List<UserDictionaryEntry>
|
||||
fun queryShortcut(shortcut: String, locale: FlorisLocale?): List<UserDictionaryEntry>
|
||||
|
||||
@Query(SELECT_ALL_FROM_WORDS)
|
||||
fun queryAll(): List<UserDictionaryEntry>
|
||||
|
||||
@Query("$SELECT_ALL_FROM_WORDS WHERE (${UserDictionary.Words.LOCALE} = :locale AND :locale IS NOT NULL) OR (${UserDictionary.Words.LOCALE} IS NULL AND :locale IS NULL)")
|
||||
fun queryAll(locale: Locale?): List<UserDictionaryEntry>
|
||||
fun queryAll(locale: FlorisLocale?): List<UserDictionaryEntry>
|
||||
|
||||
@Query("$SELECT_ALL_FROM_WORDS WHERE ${UserDictionary.Words.WORD} = :word")
|
||||
fun queryExact(word: String): List<UserDictionaryEntry>
|
||||
|
||||
@Query("$SELECT_ALL_FROM_WORDS WHERE ${UserDictionary.Words.WORD} = :word AND (${UserDictionary.Words.LOCALE} = :locale OR (${UserDictionary.Words.LOCALE} IS NULL AND :locale IS NULL))")
|
||||
fun queryExact(word: String, locale: Locale?): List<UserDictionaryEntry>
|
||||
fun queryExact(word: String, locale: FlorisLocale?): List<UserDictionaryEntry>
|
||||
|
||||
@Query("$SELECT_ALL_FROM_WORDS WHERE ${UserDictionary.Words.WORD} = :word AND $LOCALE_MATCHES")
|
||||
fun queryExactFuzzyLocale(word: String, locale: Locale?): List<UserDictionaryEntry>
|
||||
fun queryExactFuzzyLocale(word: String, locale: FlorisLocale?): List<UserDictionaryEntry>
|
||||
|
||||
@Query("SELECT DISTINCT ${UserDictionary.Words.LOCALE} FROM $WORDS_TABLE")
|
||||
fun queryLanguageList(): List<Locale?>
|
||||
fun queryLanguageList(): List<FlorisLocale?>
|
||||
|
||||
@Insert
|
||||
fun insert(entry: UserDictionaryEntry)
|
||||
@@ -165,7 +164,7 @@ interface UserDictionaryDatabase {
|
||||
}
|
||||
if (word != null && freq != null) {
|
||||
val alreadyExistingEntries = userDictionaryDao().queryExact(
|
||||
word!!, locale?.let { LocaleUtils.stringToLocale(it) }
|
||||
word!!, locale?.let { FlorisLocale.fromTag(it) }
|
||||
)
|
||||
if (alreadyExistingEntries.isNotEmpty()) {
|
||||
userDictionaryDao().update(UserDictionaryEntry(alreadyExistingEntries[0].id, word!!, freq!!, locale, shortcut))
|
||||
@@ -226,18 +225,18 @@ abstract class FlorisUserDictionaryDatabase : RoomDatabase(), UserDictionaryData
|
||||
|
||||
class Converters {
|
||||
@TypeConverter
|
||||
fun localeToString(locale: Locale?): String? {
|
||||
fun localeToString(locale: FlorisLocale?): String? {
|
||||
return when (locale) {
|
||||
null -> null
|
||||
else -> locale.toString()
|
||||
else -> locale.localeTag()
|
||||
}
|
||||
}
|
||||
|
||||
@TypeConverter
|
||||
fun stringToLocale(string: String?): Locale? {
|
||||
fun stringToLocale(string: String?): FlorisLocale? {
|
||||
return when (string) {
|
||||
null, "all", "null", "" -> null
|
||||
else -> LocaleUtils.stringToLocale(string)
|
||||
else -> FlorisLocale.fromTag(string)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -255,7 +254,7 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
)
|
||||
}
|
||||
|
||||
override fun query(word: String, locale: Locale?): List<UserDictionaryEntry> {
|
||||
override fun query(word: String, locale: FlorisLocale?): List<UserDictionaryEntry> {
|
||||
return if (locale == null) {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.WORD} LIKE ? AND ${UserDictionary.Words.LOCALE} IS NULL",
|
||||
@@ -265,7 +264,7 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
} else {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.WORD} LIKE ? AND (${UserDictionary.Words.LOCALE} = ? OR ${UserDictionary.Words.LOCALE} = ? OR ${UserDictionary.Words.LOCALE} IS NULL)",
|
||||
selectionArgs = arrayOf("%$word%", locale.toString(), locale.language.toString()),
|
||||
selectionArgs = arrayOf("%$word%", locale.localeTag(), locale.language),
|
||||
sortOrder = SORT_BY_FREQ_DESC,
|
||||
)
|
||||
}
|
||||
@@ -279,7 +278,7 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
)
|
||||
}
|
||||
|
||||
override fun queryShortcut(shortcut: String, locale: Locale?): List<UserDictionaryEntry> {
|
||||
override fun queryShortcut(shortcut: String, locale: FlorisLocale?): List<UserDictionaryEntry> {
|
||||
return if (locale == null) {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.SHORTCUT} = ? AND ${UserDictionary.Words.LOCALE} IS NULL",
|
||||
@@ -289,7 +288,7 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
} else {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.SHORTCUT} = ? AND (${UserDictionary.Words.LOCALE} = ? OR ${UserDictionary.Words.LOCALE} = ? OR ${UserDictionary.Words.LOCALE} IS NULL)",
|
||||
selectionArgs = arrayOf(shortcut, locale.toString(), locale.language.toString()),
|
||||
selectionArgs = arrayOf(shortcut, locale.localeTag(), locale.language),
|
||||
sortOrder = SORT_BY_FREQ_DESC,
|
||||
)
|
||||
}
|
||||
@@ -303,7 +302,7 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
)
|
||||
}
|
||||
|
||||
override fun queryAll(locale: Locale?): List<UserDictionaryEntry> {
|
||||
override fun queryAll(locale: FlorisLocale?): List<UserDictionaryEntry> {
|
||||
return if (locale == null) {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.LOCALE} IS NULL",
|
||||
@@ -313,7 +312,7 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
} else {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.LOCALE} = ?",
|
||||
selectionArgs = arrayOf(locale.toString()),
|
||||
selectionArgs = arrayOf(locale.localeTag()),
|
||||
sortOrder = SORT_BY_FREQ_DESC,
|
||||
)
|
||||
}
|
||||
@@ -327,7 +326,7 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
)
|
||||
}
|
||||
|
||||
override fun queryExact(word: String, locale: Locale?): List<UserDictionaryEntry> {
|
||||
override fun queryExact(word: String, locale: FlorisLocale?): List<UserDictionaryEntry> {
|
||||
return if (locale == null) {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.WORD} = ? AND ${UserDictionary.Words.LOCALE} IS NULL",
|
||||
@@ -337,13 +336,13 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
} else {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.WORD} = ? AND ${UserDictionary.Words.LOCALE} = ?",
|
||||
selectionArgs = arrayOf(word, locale.toString()),
|
||||
selectionArgs = arrayOf(word, locale.localeTag()),
|
||||
sortOrder = SORT_BY_FREQ_DESC,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
override fun queryExactFuzzyLocale(word: String, locale: Locale?): List<UserDictionaryEntry> {
|
||||
override fun queryExactFuzzyLocale(word: String, locale: FlorisLocale?): List<UserDictionaryEntry> {
|
||||
return if (locale == null) {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.WORD} = ? AND ${UserDictionary.Words.LOCALE} IS NULL",
|
||||
@@ -353,13 +352,13 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
} else {
|
||||
queryResolver(
|
||||
selection = "${UserDictionary.Words.WORD} = ? AND (${UserDictionary.Words.LOCALE} = ? OR ${UserDictionary.Words.LOCALE} IS NULL)",
|
||||
selectionArgs = arrayOf(word, locale.toString()),
|
||||
selectionArgs = arrayOf(word, locale.localeTag()),
|
||||
sortOrder = SORT_BY_FREQ_DESC,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
override fun queryLanguageList(): List<Locale?> {
|
||||
override fun queryLanguageList(): List<FlorisLocale?> {
|
||||
val resolver = applicationContext.get()?.contentResolver ?: return listOf()
|
||||
val cursor = resolver.query(
|
||||
UserDictionary.Words.CONTENT_URI,
|
||||
@@ -372,13 +371,13 @@ class SystemUserDictionaryDatabase(context: Context) : UserDictionaryDatabase {
|
||||
return listOf()
|
||||
}
|
||||
val localeIndex = cursor.getColumnIndex(UserDictionary.Words.LOCALE)
|
||||
val retList = mutableSetOf<Locale?>()
|
||||
val retList = mutableSetOf<FlorisLocale?>()
|
||||
while (cursor.moveToNext()) {
|
||||
val localeStr = cursor.getString(localeIndex)
|
||||
if (localeStr == null) {
|
||||
retList.add(null)
|
||||
} else {
|
||||
retList.add(LocaleUtils.stringToLocale(localeStr))
|
||||
retList.add(FlorisLocale.fromTag(localeStr))
|
||||
}
|
||||
}
|
||||
cursor.close()
|
||||
|
||||
@@ -0,0 +1,441 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.ime.dictionary
|
||||
|
||||
import dev.patrickgold.florisboard.ime.nlp.SuggestionList
|
||||
|
||||
/*
|
||||
* ====================== IMPORTANT ========================
|
||||
*
|
||||
* All code in this file is only temporary added back in so the stable track has suggestions again.
|
||||
* In 0.3.15 a renewed suggestion algorithm will be built and this mess will be removed!
|
||||
*
|
||||
* ==========================================================
|
||||
*/
|
||||
|
||||
/**
|
||||
* Abstract interface representing a n-gram of tokens. Each n-gram instance can be assigned a
|
||||
* unique frequency [freq].
|
||||
*/
|
||||
open class Ngram<T : Any, F : Comparable<F>>(_tokens: List<Token<T>>, _freq: F) {
|
||||
companion object {
|
||||
/** Constant order value for unigrams. */
|
||||
const val ORDER_UNIGRAM: Int = 1
|
||||
|
||||
/** Constant order value for bigrams. */
|
||||
const val ORDER_BIGRAM: Int = 2
|
||||
|
||||
/** Constant order value for trigrams. */
|
||||
const val ORDER_TRIGRAM: Int = 3
|
||||
}
|
||||
|
||||
init {
|
||||
if (_tokens.size < ORDER_UNIGRAM) {
|
||||
throw Exception("A n-gram must contain at least 1 token!")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A list of tokens for this n-gram. The length of this list is guaranteed to be matching
|
||||
* [order].
|
||||
*/
|
||||
val tokens: List<Token<T>> = _tokens
|
||||
|
||||
/**
|
||||
* The frequency value of this n-gram.
|
||||
*/
|
||||
val freq: F = _freq
|
||||
|
||||
/**
|
||||
* The order of this n-gram (1, 2, 3, ...).
|
||||
*/
|
||||
val order: Int
|
||||
get() = tokens.size
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract interface representing a token used in [Ngram].
|
||||
*/
|
||||
open class Token<T : Any>(_data: T) {
|
||||
/**
|
||||
* The data of this token.
|
||||
*/
|
||||
val data: T = _data
|
||||
|
||||
override fun toString(): String {
|
||||
return "Token(\"$data\")"
|
||||
}
|
||||
|
||||
override fun hashCode(): Int {
|
||||
return data.hashCode()
|
||||
}
|
||||
|
||||
override fun equals(other: Any?): Boolean {
|
||||
if (this === other) return true
|
||||
if (javaClass != other?.javaClass) return false
|
||||
|
||||
other as Token<*>
|
||||
|
||||
if (data != other.data) return false
|
||||
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a list of tokens carrying [CharSequence] data to a list of [CharSequence].
|
||||
*/
|
||||
fun List<Token<CharSequence>>.toCharSequenceList(): List<CharSequence> {
|
||||
return this.map { it.data }
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a list of tokens carrying [String] data to a list of [String].
|
||||
*/
|
||||
fun List<Token<String>>.toStringList(): List<String> {
|
||||
return this.map { it.data }
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract interface for a language model. Can house any n-grams with a minimum order of one.
|
||||
*/
|
||||
interface LanguageModel<T : Any, F : Comparable<F>> {
|
||||
/**
|
||||
* Tries to get the n-gram for the passed [tokens]. Throws a NPE if no match could be found.
|
||||
*/
|
||||
@Throws(NullPointerException::class)
|
||||
fun getNgram(vararg tokens: T): Ngram<T, F>
|
||||
|
||||
/**
|
||||
* Tries to get the n-gram for the passed [ngram], whereas the frequency is ignored while
|
||||
* searching. Throws a NPE if no match could be found.
|
||||
*/
|
||||
@Throws(NullPointerException::class)
|
||||
fun getNgram(ngram: Ngram<T, F>): Ngram<T, F>
|
||||
|
||||
/**
|
||||
* Tries to get the n-gram for the passed [tokens]. Returns null if no match could be found.
|
||||
*/
|
||||
fun getNgramOrNull(vararg tokens: T): Ngram<T, F>?
|
||||
|
||||
/**
|
||||
* Tries to get the n-gram for the passed [ngram], whereas the frequency is ignored while
|
||||
* searching. Returns null if no match could be found.
|
||||
*/
|
||||
fun getNgramOrNull(ngram: Ngram<T, F>): Ngram<T, F>?
|
||||
|
||||
/**
|
||||
* Checks if a given [ngram] exists within this model. If [doMatchFreq] is set to true, the
|
||||
* frequency is also matched.
|
||||
*/
|
||||
fun hasNgram(ngram: Ngram<T, F>, doMatchFreq: Boolean = false): Boolean
|
||||
|
||||
/**
|
||||
* Matches all n-grams which match the given [ngram], whereas the last item in the n-gram is
|
||||
* is used to search for predictions.
|
||||
*/
|
||||
fun matchAllNgrams(
|
||||
ngram: Ngram<T, F>,
|
||||
maxEditDistance: Int,
|
||||
maxTokenCount: Int,
|
||||
allowPossiblyOffensive: Boolean
|
||||
): List<Token<T>>
|
||||
}
|
||||
|
||||
/**
|
||||
* Mutable version of [LanguageModel].
|
||||
*/
|
||||
interface MutableLanguageModel<T : Any, F : Comparable<F>> : LanguageModel<T, F> {
|
||||
fun deleteNgram(ngram: Ngram<T, F>)
|
||||
|
||||
fun insertNgram(ngram: Ngram<T, F>)
|
||||
|
||||
fun updateNgram(ngram: Ngram<T, F>)
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents the root node to a n-gram tree.
|
||||
*/
|
||||
open class NgramTree(
|
||||
sameOrderChildren: MutableList<NgramNode> = mutableListOf(),
|
||||
higherOrderChildren: MutableList<NgramNode> = mutableListOf()
|
||||
) : NgramNode(0, '?', -1, sameOrderChildren, higherOrderChildren)
|
||||
|
||||
/**
|
||||
* A node of a n-gram tree, which holds the character it represents, the corresponding frequency,
|
||||
* a pre-computed string representing all parent characters and the current one as well as child
|
||||
* nodes, one for the same order n-gram nodes and one for the higher order n-gram nodes.
|
||||
*/
|
||||
open class NgramNode(
|
||||
val order: Int,
|
||||
val char: Char,
|
||||
val freq: Int,
|
||||
val sameOrderChildren: MutableList<NgramNode> = mutableListOf(),
|
||||
val higherOrderChildren: MutableList<NgramNode> = mutableListOf()
|
||||
) {
|
||||
companion object {
|
||||
const val FREQ_CHARACTER = -1
|
||||
const val FREQ_WORD_MIN = 0
|
||||
const val FREQ_WORD_MAX = 255
|
||||
const val FREQ_WORD_FILLER = -2
|
||||
const val FREQ_IS_POSSIBLY_OFFENSIVE = 0
|
||||
}
|
||||
|
||||
val isCharacter: Boolean
|
||||
get() = freq == FREQ_CHARACTER
|
||||
|
||||
val isWord: Boolean
|
||||
get() = freq in FREQ_WORD_MIN..FREQ_WORD_MAX
|
||||
|
||||
val isWordFiller: Boolean
|
||||
get() = freq == FREQ_WORD_FILLER
|
||||
|
||||
val isPossiblyOffensive: Boolean
|
||||
get() = freq == FREQ_IS_POSSIBLY_OFFENSIVE
|
||||
|
||||
fun findWord(word: String): NgramNode? {
|
||||
var currentNode = this
|
||||
for ((pos, char) in word.withIndex()) {
|
||||
val childNode = if (pos == 0) {
|
||||
currentNode.higherOrderChildren.find { it.char == char }
|
||||
} else {
|
||||
currentNode.sameOrderChildren.find { it.char == char }
|
||||
}
|
||||
if (childNode != null) {
|
||||
currentNode = childNode
|
||||
} else {
|
||||
return null
|
||||
}
|
||||
}
|
||||
return if (currentNode.isWord || currentNode.isWordFiller) {
|
||||
currentNode
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This function allows to search for a given [input] word with a given [maxEditDistance] and
|
||||
* adds all matches in the trie to the [list].
|
||||
*/
|
||||
fun listSimilarWords(
|
||||
input: String,
|
||||
list: SuggestionList,
|
||||
word: StringBuilder,
|
||||
allowPossiblyOffensive: Boolean,
|
||||
maxEditDistance: Int,
|
||||
deletionCost: Int = 0,
|
||||
insertionCost: Int = 0,
|
||||
substitutionCost: Int = 0,
|
||||
pos: Int = -1
|
||||
) {
|
||||
if (pos > -1) {
|
||||
word.append(char)
|
||||
}
|
||||
val costSum = deletionCost + insertionCost + substitutionCost
|
||||
if (pos > -1 && (pos + 1 == input.length) && isWord && ((isPossiblyOffensive && allowPossiblyOffensive)
|
||||
|| !isPossiblyOffensive)) {
|
||||
// Using shift right instead of divide by 2^(costSum) as it is mathematically the
|
||||
// same but faster.
|
||||
list.add(word.toString(), freq shr costSum)
|
||||
}
|
||||
if (pos <= -1) {
|
||||
for (childNode in higherOrderChildren) {
|
||||
childNode.listSimilarWords(
|
||||
input, list, word, allowPossiblyOffensive, maxEditDistance, 0, 0, 0, 0
|
||||
)
|
||||
}
|
||||
} else if (maxEditDistance == costSum) {
|
||||
if (pos + 1 < input.length) {
|
||||
sameOrderChildren.find { it.char == input[pos + 1] }?.listSimilarWords(
|
||||
input, list, word, allowPossiblyOffensive, maxEditDistance,
|
||||
deletionCost, insertionCost, substitutionCost, pos + 1
|
||||
)
|
||||
}
|
||||
} else {
|
||||
// Delete
|
||||
if (pos + 2 < input.length) {
|
||||
sameOrderChildren.find { it.char == input[pos + 2] }?.listSimilarWords(
|
||||
input, list, word, allowPossiblyOffensive, maxEditDistance,
|
||||
deletionCost + 1, insertionCost, substitutionCost, pos + 2
|
||||
)
|
||||
}
|
||||
for (childNode in sameOrderChildren) {
|
||||
if (pos + 1 < input.length && childNode.char == input[pos + 1]) {
|
||||
childNode.listSimilarWords(
|
||||
input, list, word, allowPossiblyOffensive, maxEditDistance,
|
||||
deletionCost, insertionCost, substitutionCost, pos + 1
|
||||
)
|
||||
} else {
|
||||
// Insert
|
||||
childNode.listSimilarWords(
|
||||
input, list, word, allowPossiblyOffensive, maxEditDistance,
|
||||
deletionCost, insertionCost + 1, substitutionCost, pos
|
||||
)
|
||||
if (pos + 1 < input.length) {
|
||||
// Substitute
|
||||
childNode.listSimilarWords(
|
||||
input, list, word, allowPossiblyOffensive, maxEditDistance,
|
||||
deletionCost, insertionCost, substitutionCost + 1, pos + 1
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pos > -1) {
|
||||
word.deleteAt(word.lastIndex)
|
||||
}
|
||||
}
|
||||
|
||||
fun listAllSameOrderWords(list: SuggestionList, word: StringBuilder, allowPossiblyOffensive: Boolean) {
|
||||
word.append(char)
|
||||
if (isWord && ((isPossiblyOffensive && allowPossiblyOffensive) || !isPossiblyOffensive)) {
|
||||
list.add(word.toString(), freq)
|
||||
}
|
||||
for (childNode in sameOrderChildren) {
|
||||
childNode.listAllSameOrderWords(list, word, allowPossiblyOffensive)
|
||||
}
|
||||
word.deleteAt(word.lastIndex)
|
||||
}
|
||||
}
|
||||
|
||||
open class FlorisLanguageModel(
|
||||
initTreeObj: NgramTree? = null
|
||||
) : LanguageModel<String, Int> {
|
||||
protected val ngramTree: NgramTree = initTreeObj ?: NgramTree()
|
||||
|
||||
override fun getNgram(vararg tokens: String): Ngram<String, Int> {
|
||||
val ngramOut = getNgramOrNull(*tokens)
|
||||
if (ngramOut != null) {
|
||||
return ngramOut
|
||||
} else {
|
||||
throw NullPointerException("No n-gram found matching the given tokens: $tokens")
|
||||
}
|
||||
}
|
||||
|
||||
override fun getNgram(ngram: Ngram<String, Int>): Ngram<String, Int> {
|
||||
val ngramOut = getNgramOrNull(ngram)
|
||||
if (ngramOut != null) {
|
||||
return ngramOut
|
||||
} else {
|
||||
throw NullPointerException("No n-gram found matching the given ngram: $ngram")
|
||||
}
|
||||
}
|
||||
|
||||
override fun getNgramOrNull(vararg tokens: String): Ngram<String, Int>? {
|
||||
var currentNode: NgramNode = ngramTree
|
||||
for (token in tokens) {
|
||||
val childNode = currentNode.findWord(token)
|
||||
if (childNode != null) {
|
||||
currentNode = childNode
|
||||
} else {
|
||||
return null
|
||||
}
|
||||
}
|
||||
return Ngram(tokens.toList().map { Token(it) }, currentNode.freq)
|
||||
}
|
||||
|
||||
override fun getNgramOrNull(ngram: Ngram<String, Int>): Ngram<String, Int>? {
|
||||
return getNgramOrNull(*ngram.tokens.toStringList().toTypedArray())
|
||||
}
|
||||
|
||||
override fun hasNgram(ngram: Ngram<String, Int>, doMatchFreq: Boolean): Boolean {
|
||||
val result = getNgramOrNull(ngram)
|
||||
return if (result != null) {
|
||||
if (doMatchFreq) {
|
||||
ngram.freq == result.freq
|
||||
} else {
|
||||
true
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
override fun matchAllNgrams(
|
||||
ngram: Ngram<String, Int>,
|
||||
maxEditDistance: Int,
|
||||
maxTokenCount: Int,
|
||||
allowPossiblyOffensive: Boolean
|
||||
): List<Token<String>> {
|
||||
val ngramList = mutableListOf<Token<String>>()
|
||||
var currentNode: NgramNode = ngramTree
|
||||
for ((t, token) in ngram.tokens.withIndex()) {
|
||||
val word = token.data
|
||||
if (t + 1 >= ngram.tokens.size) {
|
||||
if (word.isNotEmpty()) {
|
||||
// The last word is not complete, so find all possible words and sort
|
||||
val splitWord = mutableListOf<Char>()
|
||||
var splitNode: NgramNode? = currentNode
|
||||
for ((pos, char) in word.withIndex()) {
|
||||
val node = if (pos == 0) {
|
||||
splitNode?.higherOrderChildren?.find { it.char == char }
|
||||
} else {
|
||||
splitNode?.sameOrderChildren?.find { it.char == char }
|
||||
}
|
||||
splitWord.add(char)
|
||||
splitNode = node
|
||||
if (node == null) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if (splitNode != null) {
|
||||
// Input thus far is valid
|
||||
val wordNodes = SuggestionList.new(maxTokenCount)
|
||||
val strBuilder = StringBuilder().append(word.substring(0, word.length - 1))
|
||||
splitNode.listAllSameOrderWords(wordNodes, strBuilder, allowPossiblyOffensive)
|
||||
ngramList.addAll(wordNodes.map { Token(it) })
|
||||
}
|
||||
if (ngramList.size < maxTokenCount) {
|
||||
val wordNodes = SuggestionList.new(maxTokenCount)
|
||||
val strBuilder = StringBuilder()
|
||||
currentNode.listSimilarWords(word, wordNodes, strBuilder, allowPossiblyOffensive, maxEditDistance)
|
||||
ngramList.addAll(wordNodes.map { Token(it) })
|
||||
}
|
||||
}
|
||||
} else {
|
||||
val node = currentNode.findWord(word)
|
||||
if (node == null) {
|
||||
return ngramList
|
||||
} else {
|
||||
currentNode = node
|
||||
}
|
||||
}
|
||||
}
|
||||
return ngramList
|
||||
}
|
||||
|
||||
fun toFlorisMutableLanguageModel(): FlorisMutableLanguageModel = FlorisMutableLanguageModel(ngramTree)
|
||||
}
|
||||
|
||||
open class FlorisMutableLanguageModel(
|
||||
initTreeObj: NgramTree? = null
|
||||
) : MutableLanguageModel<String, Int>, FlorisLanguageModel(initTreeObj) {
|
||||
override fun deleteNgram(ngram: Ngram<String, Int>) {
|
||||
TODO("Not yet implemented")
|
||||
}
|
||||
|
||||
override fun insertNgram(ngram: Ngram<String, Int>) {
|
||||
TODO("Not yet implemented")
|
||||
}
|
||||
|
||||
override fun updateNgram(ngram: Ngram<String, Int>) {
|
||||
TODO("Not yet implemented")
|
||||
}
|
||||
|
||||
fun toFlorisLanguageModel(): FlorisLanguageModel = FlorisLanguageModel(ngramTree)
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.ime.keyboard
|
||||
|
||||
import dev.patrickgold.florisboard.ime.core.Subtype
|
||||
import dev.patrickgold.florisboard.ime.text.key.*
|
||||
|
||||
interface ComputingEvaluator {
|
||||
fun evaluateCaps(): Boolean
|
||||
|
||||
fun evaluateCaps(data: KeyData): Boolean
|
||||
|
||||
fun evaluateCharHalfWidth(): Boolean = false
|
||||
|
||||
fun evaluateKanaKata(): Boolean = false
|
||||
|
||||
fun evaluateKanaSmall(): Boolean = false
|
||||
|
||||
fun evaluateEnabled(data: KeyData): Boolean
|
||||
|
||||
fun evaluateVisible(data: KeyData): Boolean
|
||||
|
||||
fun getActiveSubtype(): Subtype
|
||||
|
||||
fun getKeyVariation(): KeyVariation
|
||||
|
||||
fun getKeyboard(): Keyboard
|
||||
|
||||
fun isSlot(data: KeyData): Boolean
|
||||
|
||||
fun getSlotData(data: KeyData): KeyData?
|
||||
}
|
||||
|
||||
object DefaultComputingEvaluator : ComputingEvaluator {
|
||||
override fun evaluateCaps(): Boolean = false
|
||||
|
||||
override fun evaluateCaps(data: KeyData): Boolean = false
|
||||
|
||||
override fun evaluateCharHalfWidth(): Boolean = false
|
||||
|
||||
override fun evaluateKanaKata(): Boolean = false
|
||||
|
||||
override fun evaluateKanaSmall(): Boolean = false
|
||||
|
||||
override fun evaluateEnabled(data: KeyData): Boolean = true
|
||||
|
||||
override fun evaluateVisible(data: KeyData): Boolean = true
|
||||
|
||||
override fun getActiveSubtype(): Subtype = Subtype.DEFAULT
|
||||
|
||||
override fun getKeyVariation(): KeyVariation = KeyVariation.NORMAL
|
||||
|
||||
override fun getKeyboard(): Keyboard = throw NotImplementedError()
|
||||
|
||||
override fun isSlot(data: KeyData): Boolean = false
|
||||
|
||||
override fun getSlotData(data: KeyData): KeyData? = null
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.ime.keyboard
|
||||
|
||||
import android.content.Context
|
||||
import android.inputmethodservice.InputMethodService
|
||||
import android.media.AudioManager
|
||||
import android.os.Build
|
||||
import android.os.VibrationEffect
|
||||
import android.os.Vibrator
|
||||
import android.provider.Settings
|
||||
import android.view.HapticFeedbackConstants
|
||||
import dev.patrickgold.florisboard.debug.flogDebug
|
||||
import dev.patrickgold.florisboard.ime.core.Preferences
|
||||
import dev.patrickgold.florisboard.ime.text.key.KeyCode
|
||||
import dev.patrickgold.florisboard.ime.text.keyboard.TextKeyData
|
||||
|
||||
/**
|
||||
* Input feedback manager responsible to process and perform audio and haptic
|
||||
* feedback for user interactions based on the system and floris preferences.
|
||||
*/
|
||||
class InputFeedbackManager private constructor(private val ims: InputMethodService) {
|
||||
companion object {
|
||||
fun new(ims: InputMethodService) = InputFeedbackManager(ims)
|
||||
}
|
||||
|
||||
private val prefs get() = Preferences.default()
|
||||
|
||||
private val audioManager = ims.getSystemService(Context.AUDIO_SERVICE) as? AudioManager
|
||||
private val vibrator = ims.getSystemService(Context.VIBRATOR_SERVICE) as? Vibrator
|
||||
private val contentResolver = ims.contentResolver
|
||||
|
||||
fun keyPress(data: KeyData = TextKeyData.UNSPECIFIED) {
|
||||
if (prefs.inputFeedback.audioFeatKeyPress) performAudioFeedback(data, 1.0)
|
||||
if (prefs.inputFeedback.hapticFeatKeyPress) performHapticFeedback(data, 1.0)
|
||||
}
|
||||
|
||||
fun keyLongPress(data: KeyData = TextKeyData.UNSPECIFIED) {
|
||||
if (prefs.inputFeedback.audioFeatKeyLongPress) performAudioFeedback(data, 0.7)
|
||||
if (prefs.inputFeedback.hapticFeatKeyLongPress) performHapticFeedback(data, 0.4)
|
||||
}
|
||||
|
||||
fun keyRepeatedAction(data: KeyData = TextKeyData.UNSPECIFIED) {
|
||||
if (prefs.inputFeedback.audioFeatKeyRepeatedAction) performAudioFeedback(data, 0.4)
|
||||
if (prefs.inputFeedback.hapticFeatKeyRepeatedAction) performHapticFeedback(data, 0.05)
|
||||
}
|
||||
|
||||
fun gestureSwipe(data: KeyData = TextKeyData.UNSPECIFIED) {
|
||||
if (prefs.inputFeedback.audioFeatGestureSwipe) performAudioFeedback(data, 0.7)
|
||||
if (prefs.inputFeedback.hapticFeatGestureSwipe) performHapticFeedback(data, 0.4)
|
||||
}
|
||||
|
||||
fun gestureMovingSwipe(data: KeyData = TextKeyData.UNSPECIFIED) {
|
||||
if (prefs.inputFeedback.audioFeatGestureMovingSwipe) performAudioFeedback(data, 0.4)
|
||||
if (prefs.inputFeedback.hapticFeatGestureMovingSwipe) performHapticFeedback(data, 0.05)
|
||||
}
|
||||
|
||||
private fun systemPref(id: String): Boolean {
|
||||
if (contentResolver == null) return false
|
||||
return Settings.System.getInt(contentResolver, id, 0) != 0
|
||||
}
|
||||
|
||||
private fun performAudioFeedback(data: KeyData, factor: Double) {
|
||||
if (audioManager == null) return
|
||||
if (!prefs.inputFeedback.audioEnabled) return
|
||||
|
||||
if (!prefs.inputFeedback.audioIgnoreSystemSettings) {
|
||||
if (!systemPref(Settings.System.SOUND_EFFECTS_ENABLED)) return
|
||||
}
|
||||
|
||||
val volume = (prefs.inputFeedback.audioVolume * factor) / 100.0
|
||||
val effect = when (data.code) {
|
||||
KeyCode.DELETE -> AudioManager.FX_KEYPRESS_DELETE
|
||||
KeyCode.ENTER -> AudioManager.FX_KEYPRESS_RETURN
|
||||
KeyCode.SPACE -> AudioManager.FX_KEYPRESS_SPACEBAR
|
||||
else -> AudioManager.FX_KEYPRESS_STANDARD
|
||||
}
|
||||
if (volume in 0.01..1.00) {
|
||||
flogDebug { "Perform audio with volume=$volume and effect=$effect" }
|
||||
audioManager.playSoundEffect(effect, volume.toFloat())
|
||||
}
|
||||
}
|
||||
|
||||
private fun performHapticFeedback(data: KeyData, factor: Double) {
|
||||
if (vibrator == null || !vibrator.hasVibrator()) return
|
||||
if (!prefs.inputFeedback.hapticEnabled) return
|
||||
|
||||
if (!prefs.inputFeedback.hapticIgnoreSystemSettings) {
|
||||
if (!systemPref(Settings.System.HAPTIC_FEEDBACK_ENABLED)) return
|
||||
}
|
||||
|
||||
if (!prefs.inputFeedback.hapticUseVibrator) {
|
||||
val view = ims.window?.window?.decorView ?: return
|
||||
val hfc = if (factor < 1.0 && Build.VERSION.SDK_INT >= Build.VERSION_CODES.O_MR1) {
|
||||
HapticFeedbackConstants.TEXT_HANDLE_MOVE
|
||||
} else {
|
||||
HapticFeedbackConstants.KEYBOARD_TAP
|
||||
}
|
||||
val didPerform = view.performHapticFeedback(hfc,
|
||||
HapticFeedbackConstants.FLAG_IGNORE_VIEW_SETTING or
|
||||
HapticFeedbackConstants.FLAG_IGNORE_GLOBAL_SETTING
|
||||
)
|
||||
if (didPerform) return
|
||||
// If not performed fall back to using the vibrator directly
|
||||
}
|
||||
|
||||
val duration = prefs.inputFeedback.hapticVibrationDuration
|
||||
if (duration != 0) {
|
||||
val effectiveDuration = (duration * factor).toLong().coerceAtLeast(1L)
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
val strength = when {
|
||||
vibrator.hasAmplitudeControl() -> prefs.inputFeedback.hapticVibrationStrength
|
||||
else -> VibrationEffect.DEFAULT_AMPLITUDE
|
||||
}
|
||||
if (strength != 0) {
|
||||
val effectiveStrength = when {
|
||||
vibrator.hasAmplitudeControl() -> (255.0 * ((strength * factor) / 100.0)).toInt().coerceIn(1, 255)
|
||||
else -> strength
|
||||
}
|
||||
flogDebug { "Perform haptic with duration=$effectiveDuration and strength=$effectiveStrength" }
|
||||
val effect = VibrationEffect.createOneShot(effectiveDuration, effectiveStrength)
|
||||
vibrator.vibrate(effect)
|
||||
}
|
||||
} else {
|
||||
flogDebug { "Perform haptic with duration=$effectiveDuration" }
|
||||
@Suppress("DEPRECATION")
|
||||
vibrator.vibrate(effectiveDuration)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user