Compare commits

..

206 Commits

Author SHA1 Message Date
Patrick Goldinger
a0de409878 Release v0.3.13-beta06 2021-07-15 21:29:13 +02:00
Patrick Goldinger
3f0944906d Merge pull request #1069 from florisboard/spelling2
Spell checker Part 2: Bug fixes, feedback incorporation etc.
2021-07-15 20:22:44 +02:00
Patrick Goldinger
79ef5445a1 Fix dictionaries with legacy encodings crashing the app (#1064)
Especially Cyrillic (ISO8859-7)
2021-07-15 18:10:58 +02:00
Patrick Goldinger
dea2795499 Fix Czech dictionary import failing for XPI archives (#1064) 2021-07-15 17:31:17 +02:00
Patrick Goldinger
650e4fb3a9 Fix FreeOffice link not working for some locales (#1064) 2021-07-14 21:14:22 +02:00
Patrick Goldinger
29a630dcd1 Fix importer not detecting license for some Mozilla XPI files (#1064) 2021-07-14 20:47:02 +02:00
Patrick Goldinger
7733ea0c02 Merge pull request #1021 from empratyush/master
added support for direct boot
2021-07-14 20:34:29 +02:00
Pratyush
3d13d65c52 added support for direct boot 2021-07-14 22:34:07 +05:30
Patrick Goldinger
575058550a Fix auto capitalization pref incorrectly labeled (#1064) 2021-07-14 18:55:55 +02:00
Patrick Goldinger
ad3e3cb7ec Release v0.3.13-beta05 2021-07-12 20:01:19 +02:00
Patrick Goldinger
e24ca7ca4a Update translations from Crowdin 2021-07-12 19:33:49 +02:00
Patrick Goldinger
1b6d8c8f6d Merge pull request #1061 from GoRaN909/patch-6
Update kurdish_kurmanci.json
2021-07-12 19:22:25 +02:00
Patrick Goldinger
27e172cbe3 Merge pull request #1063 from florisboard/icu-header-fix
Extend build script to remove unused ICU header files
2021-07-12 19:03:19 +02:00
Patrick Goldinger
e40c720f99 Extend build script to remove unused ICU header files
Realized they are quite a lot lol
2021-07-12 18:48:48 +02:00
GoRaN
c8d7071741 Update kurdish_kurmanci.json
Replaecd none Kurmanci character "ı" by the correct one latin letter "i"
2021-07-10 22:22:58 +03:00
Patrick Goldinger
5c2154253d Merge pull request #1015 from jeremiah-miller/optimized-swype-layouts
Add keyboard layouts optimized for swype input
2021-07-10 20:46:00 +02:00
Patrick Goldinger
3c79cca77c Fix cut off hints in borderless themes (#1049) 2021-07-10 19:53:54 +02:00
Patrick Goldinger
65c0ab724f Merge pull request #1011 from X-yl/improvements
Code cleanup + some polish
2021-07-10 19:17:49 +02:00
Patrick Goldinger
d5d259e13e Merge pull request #1052 from florisboard/spelling
Add experimental spell checker & extension support
2021-07-10 19:06:55 +02:00
Patrick Goldinger
691d3929eb Prepare spell checker for experimental release on beta 2021-07-10 19:03:36 +02:00
Patrick Goldinger
57b3b7b5d7 Add spelling correction cache 2021-07-10 17:55:07 +02:00
Patrick Goldinger
1582c1a3cf Re-package ICU lib to reduce APK size a bit 2021-07-10 16:52:55 +02:00
Patrick Goldinger
e22fe940c1 Merge pull request #1057 from Luensche/patch-1
Link files in CONTRIBUTING.md
2021-07-10 02:53:44 +02:00
Björn Engel
7f19892444 Link files in CONTRIBUTING.md 2021-07-09 08:44:46 +02:00
Patrick Goldinger
123a016ec0 Raise import limit from 20 to 24 MiB 2021-07-08 01:27:41 +02:00
Patrick Goldinger
5b6dcb3bc4 Fix some issues with manual import 2021-07-08 01:15:45 +02:00
Patrick Goldinger
8d71200b66 Add manual aff/dic import 2021-07-07 21:10:03 +02:00
Patrick Goldinger
6d333d2b40 Remove language debug output code 2021-07-07 03:47:42 +02:00
Patrick Goldinger
baacfd4469 Rework data filters, add more debug logging
(For investigating crashes for some dictionary files)
2021-07-07 03:44:53 +02:00
Patrick Goldinger
e8925ce697 Add ICU data filter (reduce data library size by 10MB / arch) 2021-07-06 03:18:25 +02:00
Patrick Goldinger
e40c2a6736 Re-add ICU as git submodule
No more 2.5mil LOC added lol
2021-07-05 19:36:18 +02:00
x-yl
b9518dc92b Cache ideal gesture lengths 2021-07-05 11:24:47 +04:00
x-yl
47f26f2336 Make gesture trail time based 2021-07-05 11:24:47 +04:00
Patrick Goldinger
fbc8d98209 Add user dictionary lookup for spell checking 2021-07-03 20:52:15 +02:00
Patrick Goldinger
27aeda8921 Implement suggestions limit / Fix initial indexing bug 2021-07-03 12:29:38 +02:00
Patrick Goldinger
4c2e642a85 Temporarily disable x86/x86_64 support to reduce APK size 2021-07-01 21:33:18 +02:00
Patrick Goldinger
f8995827f6 Raise max spelling dict size limit from 6MiB to 16MiB 2021-07-01 20:40:34 +02:00
Patrick Goldinger
d7593d12f2 Remove Home tab current-word paragraph 2021-07-01 20:38:24 +02:00
Patrick Goldinger
cd471a8323 Update README.md and open source licenses 2021-07-01 19:26:42 +02:00
Patrick Goldinger
9ad962c7d0 Remove unused flict binary files 2021-07-01 19:18:01 +02:00
Patrick Goldinger
b4e16ca445 Remove hunspell library 2021-07-01 19:06:46 +02:00
Patrick Goldinger
c2269fe23d Fix nuspell initialization error 2021-07-01 19:04:55 +02:00
Patrick Goldinger
d720435945 Add nuspell library 2021-07-01 18:47:30 +02:00
Patrick Goldinger
e33b0d39f9 Add external glob implementation 2021-07-01 18:45:43 +02:00
Patrick Goldinger
bbf3fb96be Add pre-compiled ICU library files 2021-07-01 18:44:59 +02:00
Patrick Goldinger
09567234cd Add ICU4C library
Frickin finally, took me waaaay to long to add.
If there just was an NDK ICU library wrapper, this could have saved
me 4 days of work...
2021-07-01 17:19:41 +02:00
Patrick Goldinger
1c2179fc50 Fix UI theme/state bugs in the spelling activity 2021-06-25 19:03:08 +02:00
Patrick Goldinger
c7fff5d9e4 Add support for Firefox XPI dictionaries 2021-06-25 18:19:55 +02:00
Patrick Goldinger
25badd6c2e Add dictionary delete option 2021-06-25 17:08:00 +02:00
Patrick Goldinger
97fb7b9427 Fix some bugs in the JNI side of spellchecking 2021-06-25 16:24:54 +02:00
Patrick Goldinger
f9b1aba27d Add spelling extension / Fix spelling service 2021-06-25 03:42:34 +02:00
Patrick Goldinger
aa0b9acabc Add JNI/Java direct string passing implementation 2021-06-25 03:41:54 +02:00
Patrick Goldinger
67b3ae5170 Add JNI logging utils 2021-06-25 03:41:04 +02:00
Patrick Goldinger
7d796ebdb3 Add spelling dictionary indexer 2021-06-24 18:16:38 +02:00
Patrick Goldinger
5737e68b8f Make FlorisApplication crash loop safe 2021-06-24 18:15:49 +02:00
Patrick Goldinger
211019b78b Add FlorisRef unit test 2021-06-24 16:53:55 +02:00
Patrick Goldinger
1db6676c45 Add extension load/write support, improve FlorisRef 2021-06-22 19:31:51 +02:00
Patrick Goldinger
da7ae028bf Add interface for extension packages 2021-06-18 03:40:47 +02:00
Patrick Goldinger
f3aa739e72 Introduce new 'FlorisRef' class as replacement for 'AssetRef' 2021-06-17 16:40:06 +02:00
Patrick Goldinger
7f09d1a1d1 Rename package 'extension' to 'res' and move out of 'ime' 2021-06-17 14:52:27 +02:00
Patrick Goldinger
5a8483e78d Add import dictionary basic implementation 2021-06-16 21:32:49 +02:00
Jeremiah Miller
841d15056d Rename DGHP to Sangaline 2021-06-14 08:32:02 -07:00
Patrick Goldinger
09cdd0fff0 Add basic import dictionary UI sheet 2021-06-12 19:52:48 +02:00
Patrick Goldinger
ebb677d203 Add overview page for spell checker prefs 2021-06-12 12:28:21 +02:00
Patrick Goldinger
cf3236f57f Remove preferences cache
SharedPreferences already caches the preference values + the cache causes
state bugs.
2021-06-11 16:27:31 +02:00
bbgun7
3bd8169600 Add keyboard layouts optimized for swipe input 2021-06-10 10:11:56 -07:00
Patrick Goldinger
f9aaec6020 Add initial spelling manager activity 2021-06-10 16:29:44 +02:00
Patrick Goldinger
bb2cc995d6 Add JNI string utils 2021-06-10 16:29:11 +02:00
Patrick Goldinger
a65aaa5f95 Add spelling manager / config 2021-06-10 16:28:50 +02:00
Patrick Goldinger
92b9a978dc Add skeleton code for spell-checker implementation 2021-06-08 04:22:47 +02:00
Patrick Goldinger
5f2729e065 Add Hunspell ported code 2021-06-08 04:22:02 +02:00
Patrick Goldinger
37bb4cea43 Release v0.3.13-beta04 2021-06-08 00:05:36 +02:00
Patrick Goldinger
79d608feea Update translations from Crowdin 2021-06-07 23:53:37 +02:00
Patrick Goldinger
54573de3e3 Merge pull request #1006 from Luensche/move_clipboard_item_to_begin
Move new clipboard items with the same content to the beginning
2021-06-07 23:51:35 +02:00
x-yl
a2243b8825 Use coroutines and improve image loading for clipboard
Sorry, didn't know we were using coroutines when I first wrote this
code!
2021-06-07 17:50:21 +04:00
Björn Engel
2fba2d3b4a Do not compare images 2021-06-07 15:49:05 +02:00
Björn Engel
fd0cbbdcb1 Move new clipboard items with the same content to the beginning, closes #991 2021-06-07 14:08:39 +02:00
Patrick Goldinger
b6e3deedf4 Add default system subtype for proper display in system settings 2021-06-04 19:39:02 +02:00
Patrick Goldinger
4c74bf1b4a Fix glide typing not working for caps/caps-lock 2021-06-04 19:19:53 +02:00
Patrick Goldinger
2a4e3c8c58 Merge pull request #982 from dessalines/halmak
Add the Halmak keyboard layout
2021-06-04 18:56:48 +02:00
Patrick Goldinger
e34e5b4260 Merge pull request #992 from florisboard/rework-textkeyboard-rendering
Rework TextKeyboard rendering
2021-06-04 18:53:42 +02:00
Patrick Goldinger
ae2df7dfe4 Fix Smartbar incorrectly not updating selection-specific keys 2021-06-04 18:49:48 +02:00
Patrick Goldinger
1b3d0a5cf2 Fix touch logic incorrect pointer and capacity issues 2021-06-04 18:31:05 +02:00
Patrick Goldinger
4c94329071 Fix glide typing not correctly initialized at startup 2021-06-04 17:13:16 +02:00
Patrick Goldinger
6ffcf2f865 Fix keyboard preview in Settings 2021-06-04 05:43:42 +02:00
Patrick Goldinger
e2c9a66880 Fix further state bugs 2021-06-04 05:12:38 +02:00
Patrick Goldinger
e9bc25ebc7 Improve extended popup rendering performance 2021-06-04 03:53:03 +02:00
Patrick Goldinger
6379e63669 Rework TextKeyboard rendering 2021-06-04 03:31:46 +02:00
Patrick Goldinger
70a0763e7f Merge pull request #981 from florisboard/fix-keyboard-state-bug
Fix keyboard state bug for the active mode
2021-06-04 03:29:47 +02:00
Dessalines
863080e6ce Remove slash from bottom row. 2021-06-03 14:46:55 -04:00
Patrick Goldinger
3ef454b8bd Fix Smartbar not showing sometimes (#987) 2021-06-03 17:43:23 +02:00
Patrick Goldinger
2bbdfc71d0 Rework UI initialization and reduce duplicate state changes 2021-06-03 15:42:28 +02:00
Patrick Goldinger
d1c783dde1 Fix keyboard state bug for the active mode 2021-06-02 17:51:18 +02:00
Dessalines
644da67601 Add the Halmak keyboard layout 2021-06-01 21:39:26 -04:00
Patrick Goldinger
b8d99efd29 Merge pull request #977 from GoRaN909/patch-5
Update kurdish.json
2021-06-01 01:15:58 +02:00
GoRaN
4067d92a44 Update kurdish.json
Added stretched button (Kashida) to support all Kurdish layouts.
2021-06-01 01:06:00 +03:00
Patrick Goldinger
13a17f3a6b Merge pull request #974 from GoRaN909/patch-2
Update ckb.json
2021-05-31 23:50:01 +02:00
Patrick Goldinger
57c679e500 Merge pull request #975 from GoRaN909/patch-3
Update kurdish_standard.json
2021-05-31 23:41:42 +02:00
Patrick Goldinger
f70f45dab6 Merge pull request #973 from GoRaN909/patch-1
Update kurdish.json
2021-05-31 23:37:41 +02:00
GoRaN
8d8f723d66 Update kurdish_standard.json
popup characters added
2021-06-01 00:29:23 +03:00
GoRaN
7c3c6a7ad7 Update ckb.json
Added popup characters for letter (ح)
2021-06-01 00:24:18 +03:00
GoRaN
d7a1c9377a Update kurdish.json
Some changes of words position and corrections codes
2021-06-01 00:19:19 +03:00
Patrick Goldinger
2a317372b2 Release v0.3.13-beta03 2021-05-31 20:18:43 +02:00
Patrick Goldinger
402f7bd267 Update translations from Crowdin 2021-05-31 20:02:33 +02:00
Patrick Goldinger
e8eb6e3068 Fix emoticon layout missing (#950) 2021-05-31 19:17:38 +02:00
Patrick Goldinger
3dd9c45777 Fix crash when using delete left swipe in raw editors (#967) 2021-05-31 18:30:24 +02:00
Patrick Goldinger
7255229361 Merge pull request #966 from florisboard/major-input-logic-overhaul
Major input logic overhaul
2021-05-31 17:52:19 +02:00
Patrick Goldinger
4d2fa29886 Fix IME checking utility not using new ID 2021-05-31 12:46:14 +02:00
Patrick Goldinger
ef90faf98b Merge pull request #963 from Hayleia/composingFix
Fix getting composer from name
2021-05-31 06:11:38 +02:00
Patrick Goldinger
82caa8365e Fix glide trail stuck after initial touch down 2021-05-31 05:16:20 +02:00
Patrick Goldinger
391257e9e9 Re-add simple key shadows 2021-05-31 05:04:02 +02:00
Patrick Goldinger
b082253167 Fix keys not registered correctly (#953) 2021-05-31 03:59:31 +02:00
Patrick Goldinger
8df701e3fe Adapt input view to new keyboard state register 2021-05-31 03:56:08 +02:00
Patrick Goldinger
9f232f5dbf Add new keyboard state register 2021-05-31 03:55:05 +02:00
Hayleia
7017726dcb Fix getting composer from name
also use an available constant when possible rather than a hardcoded string
2021-05-30 11:05:28 +02:00
Patrick Goldinger
b48ca8fd1e Restructure the package structure 2021-05-28 21:04:27 +02:00
Patrick Goldinger
88d5e15a5e Introduce TextKeyboardState 2021-05-28 03:36:54 +02:00
Patrick Goldinger
e9537cbd1d Merge pull request #947 from yashpalgoyal1304/devanagari-fix
Fixed Devanagari Codes
2021-05-26 23:32:10 +02:00
yashpalgoyal1304
8e216bf3ac Fixed Devanagari Codes 2021-05-27 02:37:14 +05:30
Patrick Goldinger
63352cc615 Improve logic and rendering performance a bit 2021-05-26 17:12:28 +02:00
Patrick Goldinger
e9e2563739 Release v0.3.13-beta02 2021-05-26 01:26:33 +02:00
Patrick Goldinger
87bb098445 Fix batch level preventing cached input from updating 2021-05-26 01:26:17 +02:00
Patrick Goldinger
da1944bedf Temporarily remove key shadow support (#943) 2021-05-26 01:09:50 +02:00
Patrick Goldinger
d4a92e0d46 Merge pull request #942 from florisboard/new-touch-logic
Introduce new touch logic to TextKeyboardView
2021-05-26 00:46:31 +02:00
yashpalgoyal1304
0fa6c1f235 Added Indic Numerals (#940)
* Indic Devanagari Numeric

* Fixed name and label

* Fixed file name

* Added indic scripts numerals
2021-05-26 00:43:21 +02:00
Patrick Goldinger
260b1ba5ca Improve touch logic 2021-05-26 00:19:35 +02:00
Patrick Goldinger
f0799a6a0e Rework text keyboard view touch logic 2021-05-25 20:48:17 +02:00
Patrick Goldinger
155238946a Merge pull request #866 from Hayleia/composing1
Composing input method (and Korean as the first subject)
2021-05-24 15:30:06 +02:00
Patrick Goldinger
45f91cf40c Merge pull request #928 from ostrya/fix-hint-merge
fix hint merge logic (#872)
2021-05-23 16:27:22 +02:00
Patrick Goldinger
94f5b56b6a Possibly fix key shadow performance 2021-05-23 16:19:28 +02:00
Kai Helbig
46db467073 fix hint merge logic (#872)
The merge of the hints depends on the underlying main key. Especially,
hints should only be shown for character keys, and if the hint is
identical to the main key, it should not be added at all. Since the
actual main key is only evaluated on demand with TextKey#compute, all
corresponding hint merge logic needs to be moved there too.
2021-05-23 12:16:33 +02:00
Patrick Goldinger
17dde536d9 Fix one-handed panel not correctly measuring sometimes (#896) 2021-05-23 03:50:17 +02:00
Patrick Goldinger
be67bf4b84 Fix Smartbar number row bugs in password fields (#905) 2021-05-23 03:19:17 +02:00
Patrick Goldinger
8f142548fe Merge pull request #920 from tsiflimagas/default-popup-fix-greek
Fix the default popup for some letters
2021-05-23 02:49:28 +02:00
Kostas Giapis
a68f439f39 Enforce the main popup character 2021-05-22 23:01:04 +03:00
Patrick Goldinger
7a0892bb36 Fix space bar text too large (#862) 2021-05-22 20:16:55 +02:00
Patrick Goldinger
8457390156 Fix keys not showing a shadow (#901, #921) 2021-05-22 19:54:12 +02:00
Hayleia
72be3898c1 move local function out, and fix firefox url bar? 2021-05-22 19:47:30 +02:00
Kostas Giapis
d35bf5af63 Fix the default popup for some letters 2021-05-22 16:23:13 +03:00
Patrick Goldinger
04d3af6484 Merge pull request #908 from Luensche/copy-versionstring
Copy version string to clipboard on click on the version
2021-05-22 12:59:46 +02:00
Björn Engel
26920e4a98 Move the toast outside of if 2021-05-20 14:44:23 +02:00
Björn Engel
7419966b51 Create ripple for click on head_area 2021-05-20 14:37:17 +02:00
Björn Engel
58b832c6c3 Add new area for long pressing and change to onLongClickListener 2021-05-20 10:20:49 +02:00
Hayleia
99f2ec1879 deprecated methods 2021-05-19 11:47:28 +02:00
Hayleia
4249f9ef86 add author 2021-05-19 11:39:13 +02:00
Hayleia
60107ae299 useless "public" keyword 2021-05-19 09:11:07 +02:00
Hayleia
6a95a865fa one spinner per linear layout 2021-05-19 09:09:14 +02:00
Hayleia
9e32589af5 style: space before colon 2021-05-19 09:04:30 +02:00
Hayleia
6133e225e1 add author 2021-05-19 09:03:34 +02:00
Hayleia
348c143d92 use case_selector to specify shift/non-shift characters 2021-05-19 08:59:52 +02:00
Hayleia
ce00785ffe Revert "support specifying uppercase and lowercase separately in json"
This reverts commit 1715e5ddfa.

Conflicts:
	app/src/main/java/dev/patrickgold/florisboard/ime/extension/AssetManager.kt
2021-05-19 08:24:51 +02:00
Hayleia
78cdce750d style in json 2021-05-19 08:22:25 +02:00
Patrick Goldinger
f3f95ae282 Fix crash loops from occurring after a crash (#910) 2021-05-19 01:33:53 +02:00
Björn Engel
018885eb30 Copy version string to clipboard on click on the version 2021-05-18 15:18:01 +02:00
Patrick Goldinger
c6c8a76dd6 Fix user dictionary max size (#898) 2021-05-18 01:51:49 +02:00
Patrick Goldinger
3cae8b7230 Release v0.3.13-beta01 2021-05-17 20:40:39 +02:00
Patrick Goldinger
814c8de0c2 Update translations from Crowdin 2021-05-17 20:30:37 +02:00
Patrick Goldinger
32fe175b48 Small code base improvements 2021-05-17 20:27:32 +02:00
Patrick Goldinger
b901f6de8d Fix space bar gestures for non-repeating actions (#886) 2021-05-17 20:13:50 +02:00
Patrick Goldinger
fe9ba3246c Merge pull request #884 from debnone/patch-1
Fix hebrew characters
2021-05-17 19:52:32 +02:00
Patrick Goldinger
71a39f0fc1 Merge pull request #876 from florisboard/android11-autofill-api
Add support for Android 11's Autofill API
2021-05-17 10:56:31 +02:00
Patrick Goldinger
f7556898e1 Document inline suggestions code / Fix some inconsistencies 2021-05-17 03:01:46 +02:00
Patrick Goldinger
578539f5d0 Add inline suggestions theme support 2021-05-17 02:04:52 +02:00
debnone
7c28c7fbea Fix hebrew characters
fixed bottom half layout its was reversed and incorrect.
2021-05-15 23:17:28 +03:00
Patrick Goldinger
88bcadff81 Fix inline suggestions state bugs and improve logic 2021-05-15 04:50:49 +02:00
Patrick Goldinger
25e25dfbf0 Add support for Android 11's Autofill API 2021-05-15 03:23:51 +02:00
Patrick Goldinger
ba3dc0178d Merge pull request #875 from X-yl/glide-number-row
Reinitialize pruner when layout changes
2021-05-15 03:20:23 +02:00
x-yl
91e7f424bb Reinitialize pruner when layout changes
Closes #854
2021-05-14 22:16:10 +04:00
Hayleia
b89f791eb0 rename south korean layout 2021-05-14 07:51:51 +02:00
Hayleia
ad3a0425ab fix config.json after merge 2021-05-14 07:51:40 +02:00
Hayleia
7cf52ecf3e Merge branch 'master' of https://github.com/florisboard/florisboard into composing1 2021-05-14 07:35:56 +02:00
Patrick Goldinger
b1ef18f4fd Improve C++ code base 2021-05-14 00:30:19 +02:00
Hayleia
b74af5bbe9 manage old subtype configurations 2021-05-13 20:48:00 +02:00
Hayleia
b8aa4bbfc4 fix subtype equals and hashcode (and javadoc) 2021-05-13 20:16:50 +02:00
Hayleia
e024ac9272 fix default subtype crash with no subtype declared 2021-05-13 20:03:47 +02:00
Hayleia
c5fa027a8e move composer dropdown in add/edit subtype dialog 2021-05-13 16:39:32 +02:00
Hayleia
b6ec2b25be Merge branch 'master' of https://github.com/florisboard/florisboard into composing1 2021-05-13 16:25:13 +02:00
Patrick Goldinger
a756b59c60 Merge pull request #606 from ostrya/improved-hints
Merge hints more flexibly
2021-05-13 14:04:08 +02:00
Patrick Goldinger
8687ce55ed Merge pull request #527 from ostrya/neo2-layout
Neo2 layout
2021-05-13 14:04:01 +02:00
ostrya
1ac6985dd0 Allow merging popups of hints #618
A new configuration was introduced to allow showing the popup keys of
the hint keys of a given character key in addition the character key's
normal popup keys.

The previous change allowed both number and symbol hint to be merged at
the same time, with the number hint being shown as popup only.
Therefore, when allowing the popups of the hint key to be shown as
popups, both hint keys need to be taken into account.

To ensure this and also take into account the separate key hint
settings for number and symbol hints, the MutablePopupSet was extended
to contain both hint keys as well as both lists of popup keys in
addition to the existing main key and relevant list. The logic that
chooses the key prioritization when rendering the popup has now also
been moved from the PopupManager to the PopupSet.

For performance, the prioritized collection of popup keys is generated
once and then cached for a given configuration in a new PopupKeys
object. This class now has the collection semantics previously present
in the PopupSet class. Different from before, the PopupKeys object now
explicitly contains the prioritized keys (those that should be shown
directly above the original key for easier reach) in order of priority.

The PopupManager now only needs to take the number of prioritized keys
(maximum 3: main key, number hint, symbol hint) when calculating the
key positions in the popup.
2021-05-13 11:52:53 +02:00
Patrick Goldinger
986b4a878f Merge pull request #858 from florisboard/java-jni-basics
Set up base for Kotlin/C++ interoperability
2021-05-13 00:33:10 +02:00
Patrick Goldinger
1ef38fe7f3 Fix GitHub workflows not setting up cmake 2021-05-12 20:31:34 +02:00
Patrick Goldinger
bcad0af35e Finalize base implementation for SuggestionList 2021-05-12 19:29:21 +02:00
Patrick Goldinger
b5b89fde4f Add native instance wrapper interface / Clean up code 2021-05-12 02:25:41 +02:00
Patrick Goldinger
be1fc710ed Set up base for Kotlin/C++ interoperability 2021-05-12 00:40:53 +02:00
Kai Helbig
aa55fd3070 Directly merge numeric and symbolic hints
Co-authored-by: Patrick Goldinger <patrick.goldinger@pm.me>
2021-05-11 23:58:31 +02:00
ostrya
a132462466 Merge hints more flexibly
To allow symbol layouts with the same or more rows as the character
layout to be hinted more consistently, the hinting of the numeric row
is split from the rest of the symbol layout.

If enabled, the numeric row hinting is always done in the first row.
If an actual numeric row is enabled as well, no additional numeric
hints will be shown (as they are only added to CHARACTER type keys).

The symbol hinting is now bottom-aligned: hints from the last symbol
row are shown in the last character row.

If the symbol layout (excluding numeric row) has at least the same
number of rows as the character layout, the numerical row is disabled
and numerical hinting is enabled, the symbol keys take precedence. The
numeric hints are instead added as additional popup characters.
2021-05-11 23:58:25 +02:00
Hayleia
df393ff607 composers can be specified in config.json
no compatibility with previous settings, need to update the regex
2021-05-11 19:03:30 +02:00
Hayleia
88a6f436ef Merge branch 'master' of https://github.com/florisboard/florisboard into composing1 2021-05-05 10:02:17 +02:00
ostrya
ee8f44d816 Use new currency set mechanism 2021-05-04 20:52:53 +02:00
ostrya
0308ec355f Adapt to new layout rework 2021-05-04 20:44:57 +02:00
Hayleia
3ac14f8a2a remove pointless reflection (going to use serialization anyways) 2021-05-04 20:16:23 +02:00
Hayleia
2b087b76dc korean double consonants and two vowels on shift key 2021-05-04 20:12:03 +02:00
Hayleia
1715e5ddfa support specifying uppercase and lowercase separately in json 2021-05-04 20:11:27 +02:00
Hayleia
6cc17161a5 factor stuff 2021-05-03 21:00:04 +02:00
Hayleia
5d1c20617b Merge branch 'master' of https://github.com/florisboard/florisboard into composing1 2021-05-03 19:22:23 +02:00
Hayleia
d9efa48c9c copy pasted code to compose texte with suggestions enabled too 2021-05-03 19:15:03 +02:00
ostrya
dedd4cb7f0 Use custom modifier for symbol layer
To make the switch from character to symbol layer more consistent,
a neo specific symbol modifier layout was added. This also allows
overriding the comma and full stop with their layer 3 equivalents.
2021-05-02 17:06:07 +02:00
ostrya
42b147b656 Add neo/bone locale variant for better compatibility
The default de locale already defines a lot of extended popups which
do not match the Neo2 / Bone layout logic. Adding a locale variant
allows overriding those defaults.

As the Locale class does not support arbitrary country keys, the new
locale was chosen as a variant of de_DE with variant name "neobone".
There is no deep meaning in the name, it is only the concatenation of
neo and bone, and according to the Javadoc of Locale, a valid variant
must have either 5 to 8 characters or start with a number.
2021-05-02 17:06:06 +02:00
ostrya
47ce490d6c Initial attempt at Neo2 / Bone layout (#498)
* For now, only layers 1, 2 and 3 are supported.
* Layer 2 is reachable via caps, apart from number row, comma and full
  stop (which I think are easier to use if not affected by caps).
  Instead, the relevant characters are added as popups.
* Layer 3 is set up as a separate neo2 symbol / number row layer

The overall layout is kept as much as possible, with the following
exceptions:
* The number row contains only numbers and minus sign, while circumflex
  and grave accents are not included.
* To not overcrowd the layout and have the same number of keys for
  first and second row, the acute accent is not included as separate
  key but can be reached as additional popup to sharp s.
* Comma and full stop are not put between m and j (or z and k
  respectively), because the backspace takes up too much space for both
  keys to be put in this row.
* Also, having comma and full stop on the same height with the space
  key makes the layout more consistent with the existing layouts and
  the special usage as ~left and ~right keys.
2021-05-02 17:05:59 +02:00
Hayleia
5563a1cadd merge compatibility 2021-05-01 20:30:24 +02:00
Hayleia
7beb2e5ef6 Merge branch 'master' of https://github.com/florisboard/florisboard into composing1 2021-05-01 19:22:19 +02:00
Hayleia
f00da13cba less kotlin warnings and slightly more usable code
still hardcoded korean composer for all layouts
but at least it's not instanciated at every keypress
2021-05-01 09:34:40 +02:00
Hayleia
bfed1747f7 better korean jsons 2021-05-01 09:19:59 +02:00
Hayleia
abb4b104fa fix input being ignored sometimes? 2021-04-30 13:12:28 +02:00
Hayleia
b69b1caa72 Test Korean composition
currency is wrong
code is plugged at the wrong place
input is ignored sometimes
there is reflection for what seems to be no reason
I know, this is just a test and this will either be done again (properly) on another branch or discarded altogether
2021-04-30 07:31:32 +02:00
295 changed files with 53077 additions and 3761 deletions

View File

@@ -9,7 +9,7 @@ insert_final_newline = true
max_line_length = 120
trim_trailing_whitespace = true
[{*.har,*.json}]
[{*.har,*.json,*yml}]
indent_size = 2
[*.kt]

View File

@@ -16,6 +16,8 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Setup CMake and Ninja
uses: lukka/get-cmake@v3.20.1
- uses: actions/cache@v2
with:
path: |
@@ -25,7 +27,7 @@ jobs:
restore-keys: |
${{ runner.os }}-gradle-
- name: Build with Gradle
run: ./gradlew clean assemble
run: ./gradlew clean assembleDebug
- uses: actions/upload-artifact@v2
with:
name: app-debug.apk

3
.gitignore vendored
View File

@@ -41,5 +41,8 @@ captures/
*.jks
crowdin.properties
# C++
.cxx/
# AndroidX Room schema JSONs
/app/schemas/

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "app/src/main/cpp/icu4c/android"]
path = app/src/main/cpp/icu4c/android
url = https://github.com/patrickgold/icu4c-android

View File

@@ -41,7 +41,7 @@ syntax (it is very easy though by just looking at some other layout files).
There are two main steps in adding new layouts, though the config step can
be skipped if you only add a layout without a new default language support.
### The config file (`app/src/main/assets/ime/config.json`)
### The config file ([`app/src/main/assets/ime/config.json`](app/src/main/assets/ime/config.json))
This file is very important, as it defines all default currency sets as
well as all default subtypes available in the Settings Subtype UI. Note
@@ -66,7 +66,7 @@ pre-configured language.
Since v0.3.10-beta05 it is possible to add custom layouts for all types.
To add a new layout, head to `app/src/main/assets/ime/text` and then select
To add a new layout, head to [`app/src/main/assets/ime/text`](app/src/main/assets/ime/text) and then select
the correct sub-directory for the type of layout you want to add. In most cases
this will be `characters` to add a layout like QWERTY etc.
@@ -74,14 +74,14 @@ For the `code` field of each key, make sure to use the UTF-8 code. An
useful tool for finding the correct code is [unicode-table.com](https://unicode-table.com/en/).
From there, you search for your letter and then use the HTML code, but without the `&#;`
For internal codes of functional or UI keys, see
`app/src/main/java/dev/patrickgold/florisboard/ime/text/key/KeyCode.kt`.
[`app/src/main/java/dev/patrickgold/florisboard/ime/text/key/KeyCode.kt`](app/src/main/java/dev/patrickgold/florisboard/ime/text/key/KeyCode.kt).
The label is equally important and should always match up with the defined
code. If `code` and `label` don't match up, FlorisBoard won't crash but
it will most likely lead to confusion in the key processing logic.
Any accents or diacritics that should be exposed via long press can be
added at `assets/ime/text/characters/extended_popups/<languageTag_name_here>.json`.
added at [`app/src/main/assets/ime/text/characters/extended_popups/<languageTag_name_here>.json`](app/src/main/assets/ime/text/characters/extended_popups).
For each key, you can add 1 main and several relevant accents. The main
accent should be used for accents which are important for the language
you add. The main field is used for determining if a hint or an accent

View File

@@ -45,7 +45,11 @@ _A. IzzySoft's repo for F-Droid_:
[<img src="https://gitlab.com/IzzyOnDroid/repo/-/raw/master/assets/IzzyOnDroid.png" height="64" alt="IzzySoft repo badge">](https://apt.izzysoft.de/fdroid/index/apk/dev.patrickgold.florisboard.beta)
_B. Use the APK provided in the release section of this repo_
_B. Google Play_:
Follow the same steps as for the stable track, the app can then be accessed [here](https://play.google.com/store/apps/details?id=dev.patrickgold.florisboard.beta).
_C. Use the APK provided in the release section of this repo_
### Giving feedback
If you want to give feedback to FlorisBoard, there are several ways to
@@ -96,6 +100,7 @@ milestones, please refer to the [Feature roadmap](#feature-roadmap).
* [x] User dictionary manager (system and internal)
### Other useful features
* [x] Support for Android 11+ inline autofill API
* [x] One-handed mode
* [x] Clipboard/cursor tools
* [x] Clipboard manager/history
@@ -198,21 +203,12 @@ to get more information on this topic.
[JakeWharton](https://github.com/JakeWharton)
* [expandable-fab](https://github.com/nambicompany/expandable-fab) by
[Nambi](https://github.com/nambicompany)
## Usage notes for included binary dictionary files
All binary dictionaries included within this project in
(this)[app/src/main/assets/ime/dict] asset folder are built from various
sources, as stated below.
### Source 1: [wordfreq library by LuminosoInsight](https://github.com/LuminosoInsight/wordfreq):
`wordfreq` is a repository which provides both a Python library and raw
data (the wordlists). Only the data has been extracted in order to build
binary dictionary files from it. `wordfreq`'s data is licensed under the
Creative Commons Attribution-ShareAlike 4.0 license
(https://creativecommons.org/licenses/by-sa/4.0/).
For further information on what wordfreq's data depends on, see
(https://github.com/LuminosoInsight/wordfreq#license).
* [ICU4C](https://github.com/unicode-org/icu) by
[The Unicode Consortium](https://github.com/unicode-org)
* [Nuspell](https://github.com/nuspell/nuspell) by
[Nuspell](https://github.com/nuspell)
* [TokyoCabinet (only used glob.h and glob.c)](https://github.com/white-gecko/TokyoCabinet) by
[Natanael Arndt](https://github.com/white-gecko)
## License
```

View File

@@ -1,14 +1,15 @@
plugins {
id("com.android.application") version "4.2.0"
kotlin("android") version "1.5.0"
kotlin("kapt") version "1.5.0"
kotlin("plugin.serialization") version "1.5.0"
id("com.android.application") version "4.2.1"
kotlin("android") version "1.5.20"
kotlin("kapt") version "1.5.20"
kotlin("plugin.serialization") version "1.5.20"
}
android {
compileSdkVersion(30)
buildToolsVersion("30.0.3")
ndkVersion = "22.1.7171670"
compileOptions {
sourceCompatibility = JavaVersion.VERSION_1_8
@@ -17,15 +18,15 @@ android {
kotlinOptions {
jvmTarget = JavaVersion.VERSION_1_8.toString()
freeCompilerArgs = listOf("-Xallow-result-return-type", "-Xopt-in=kotlin.RequiresOptIn")
freeCompilerArgs = listOf("-Xallow-result-return-type", "-Xopt-in=kotlin.RequiresOptIn", "-Xopt-in=kotlin.contracts.ExperimentalContracts")
}
defaultConfig {
applicationId = "dev.patrickgold.florisboard"
minSdkVersion(23)
targetSdkVersion(30)
versionCode(43)
versionName("0.3.12")
versionCode(49)
versionName("0.3.13")
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
@@ -38,17 +39,47 @@ android {
)
}
}
externalNativeBuild {
cmake {
cFlags("-fvisibility=hidden", "-DU_STATIC_IMPLEMENTATION=1")
cppFlags("-fvisibility=hidden", "-std=c++17", "-fexceptions", "-ffunction-sections", "-fdata-sections", "-DU_DISABLE_RENAMING=1", "-DU_STATIC_IMPLEMENTATION=1")
arguments("-DANDROID_STL=c++_static")
}
}
ndk {
//abiFilters += listOf("x86", "x86_64", "armeabi-v7a", "arm64-v8a")
abiFilters += listOf("armeabi-v7a", "arm64-v8a")
}
sourceSets {
maybeCreate("main").apply {
jni {
srcDirs("src/main/jniLibs")
}
}
}
}
buildFeatures {
viewBinding = true
}
externalNativeBuild {
cmake {
path("src/main/cpp/CMakeLists.txt")
}
}
buildTypes {
named("debug").configure {
applicationIdSuffix = ".debug"
versionNameSuffix = "-debug"
isDebuggable = true
isJniDebuggable = true
resValue("mipmap", "floris_app_icon", "@mipmap/ic_app_icon_debug")
resValue("mipmap", "floris_app_icon_round", "@mipmap/ic_app_icon_debug_round")
resValue("string", "floris_app_name", "FlorisBoard Debug")
@@ -89,6 +120,7 @@ android {
dependencies {
implementation("androidx.activity", "activity-ktx", "1.2.1")
implementation("androidx.appcompat", "appcompat", "1.2.0")
implementation("androidx.autofill", "autofill", "1.1.0")
implementation("androidx.core", "core-ktx", "1.3.2")
implementation("androidx.fragment", "fragment-ktx", "1.3.0")
implementation("androidx.preference", "preference-ktx", "1.1.1")
@@ -104,9 +136,11 @@ dependencies {
implementation("androidx.room", "room-runtime", "2.2.6")
kapt("androidx.room", "room-compiler","2.2.6")
testImplementation("junit", "junit", "4.13.1")
testImplementation(kotlin("test"))
testImplementation("androidx.test", "core", "1.3.0")
testImplementation("org.mockito", "mockito-inline", "3.7.7")
testImplementation("org.robolectric", "robolectric", "4.5.1")
androidTestImplementation("androidx.test.ext", "junit", "1.1.2")
androidTestImplementation("androidx.test.espresso", "espresso-core", "3.3.0")
}

View File

@@ -21,7 +21,7 @@
<uses-permission android:name="android.permission.VIBRATE"/>
<application
android:name=".ime.core.FlorisApplication"
android:name="dev.patrickgold.florisboard.FlorisApplication"
android:allowBackup="false"
android:icon="@mipmap/floris_app_icon"
android:label="@string/floris_app_name"
@@ -31,16 +31,25 @@
<!-- IME service -->
<service
android:name="dev.patrickgold.florisboard.ime.core.FlorisBoard"
android:name="dev.patrickgold.florisboard.FlorisImeService"
android:label="@string/floris_app_name"
android:permission="android.permission.BIND_INPUT_METHOD">
<meta-data
android:name="android.view.im"
android:resource="@xml/method"/>
android:permission="android.permission.BIND_INPUT_METHOD"
android:directBootAware="true">
<intent-filter>
<action android:name="android.view.InputMethod"/>
</intent-filter>
<meta-data android:name="android.view.im" android:resource="@xml/method"/>
</service>
<!-- Spellchecker service -->
<service
android:name="dev.patrickgold.florisboard.FlorisSpellCheckerService"
android:label="@string/floris_app_name"
android:permission="android.permission.BIND_TEXT_SERVICE">
<intent-filter>
<action android:name="android.service.textservice.SpellCheckerService"/>
</intent-filter>
<meta-data android:name="android.view.textservice.scs" android:resource="@xml/spellchecker"/>
</service>
<!-- Settings Activity -->
@@ -87,6 +96,14 @@
android:label="@string/settings__theme_editor__title"
android:theme="@style/SettingsTheme"/>
<!-- Spelling Activity -->
<activity
android:name="dev.patrickgold.florisboard.settings.spelling.SpellingActivity"
android:icon="@mipmap/floris_app_icon"
android:label="@string/settings__spelling__title_overview"
android:roundIcon="@mipmap/floris_app_icon_round"
android:theme="@style/SettingsTheme"/>
<!-- About Activity -->
<activity
android:name="dev.patrickgold.florisboard.settings.AboutActivity"

View File

@@ -1,5 +1,9 @@
{
"package": "dev.patrickgold.florisboard",
"composers": [
{ "$": "appender" },
{ "$": "hangul-unicode" }
],
"currencySets": [
{
"name": "azerbaijani_manat",
@@ -246,6 +250,7 @@
{
"id": 101,
"languageTag": "en-US",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "qwerty"
@@ -254,6 +259,7 @@
{
"id": 102,
"languageTag": "en-UK",
"composer": "appender",
"currencySet": "pound",
"preferred": {
"characters": "qwerty"
@@ -262,6 +268,7 @@
{
"id": 103,
"languageTag": "en-CA",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "qwerty"
@@ -270,6 +277,7 @@
{
"id": 104,
"languageTag": "en-AU",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "qwerty"
@@ -278,6 +286,7 @@
{
"id": 201,
"languageTag": "de-DE",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwertz"
@@ -286,6 +295,7 @@
{
"id": 202,
"languageTag": "de-AT",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwertz"
@@ -294,14 +304,27 @@
{
"id": 203,
"languageTag": "de-CH",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "swiss_german"
}
},
{
"id": 204,
"languageTag": "de-DE-neobone",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "neo2",
"symbols": "neo2",
"numericRow": "neo2"
}
},
{
"id": 301,
"languageTag": "fr-FR",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "azerty"
@@ -310,6 +333,7 @@
{
"id": 302,
"languageTag": "fr-CA",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "canadian_french"
@@ -318,6 +342,7 @@
{
"id": 303,
"languageTag": "fr-CH",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "swiss_french"
@@ -326,6 +351,7 @@
{
"id": 401,
"languageTag": "it-IT",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwerty"
@@ -334,6 +360,7 @@
{
"id": 402,
"languageTag": "it-CH",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "swiss_italian"
@@ -342,6 +369,7 @@
{
"id": 501,
"languageTag": "es-ES",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "spanish"
@@ -350,6 +378,7 @@
{
"id": 502,
"languageTag": "es-US",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "spanish"
@@ -358,6 +387,7 @@
{
"id": 503,
"languageTag": "es-419",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "spanish"
@@ -366,6 +396,7 @@
{
"id": 601,
"languageTag": "pt-PT",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwerty"
@@ -374,6 +405,7 @@
{
"id": 602,
"languageTag": "pt-BR",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "qwerty"
@@ -382,6 +414,7 @@
{
"id": 701,
"languageTag": "nb-NO",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "norwegian"
@@ -390,6 +423,7 @@
{
"id": 702,
"languageTag": "nn-NO",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "norwegian"
@@ -398,6 +432,7 @@
{
"id": 711,
"languageTag": "sv-SE",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "swedish_finnish"
@@ -406,6 +441,7 @@
{
"id": 721,
"languageTag": "fi-FI",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "swedish_finnish"
@@ -414,6 +450,7 @@
{
"id": 731,
"languageTag": "da-DK",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "danish"
@@ -422,6 +459,7 @@
{
"id": 741,
"languageTag": "is-IS",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "icelandic"
@@ -430,6 +468,7 @@
{
"id": 751,
"languageTag": "fo",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "faroese"
@@ -438,6 +477,7 @@
{
"id": 801,
"languageTag": "fa-FA",
"composer": "appender",
"currencySet": "iranian_rial",
"preferred": {
"characters": "persian",
@@ -449,6 +489,7 @@
{
"id": 901,
"languageTag": "ar",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "arabic",
@@ -460,6 +501,7 @@
{
"id": 1001,
"languageTag": "hu",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "hungarian"
@@ -468,6 +510,7 @@
{
"id": 1101,
"languageTag": "eo",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "esperanto"
@@ -476,6 +519,7 @@
{
"id": 1201,
"languageTag": "hr",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwertz"
@@ -484,6 +528,7 @@
{
"id": 1301,
"languageTag": "ru",
"composer": "appender",
"currencySet": "russian_ruble",
"preferred": {
"characters": "jcuken_russian"
@@ -492,6 +537,7 @@
{
"id": 1351,
"languageTag": "uk",
"composer": "appender",
"currencySet": "ukrainian_hryvnia",
"preferred": {
"characters": "jcuken_ukrainian"
@@ -500,6 +546,7 @@
{
"id": 1401,
"languageTag": "el",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "greek"
@@ -508,6 +555,7 @@
{
"id": 1501,
"languageTag": "ro",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwerty"
@@ -516,6 +564,7 @@
{
"id": 1601,
"languageTag": "pl",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwerty"
@@ -524,6 +573,7 @@
{
"id": 1701,
"languageTag": "bg-bg",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "bulgarian_phonetic"
@@ -532,6 +582,7 @@
{
"id": 1801,
"languageTag": "tr",
"composer": "appender",
"currencySet": "turkish_lira",
"preferred": {
"characters": "qwerty"
@@ -540,6 +591,7 @@
{
"id": 1901,
"languageTag": "iw-IL",
"composer": "appender",
"currencySet": "israeli_new_shekel",
"preferred": {
"characters": "hebrew"
@@ -548,6 +600,7 @@
{
"id": 2001,
"languageTag": "ckb",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "kurdish",
@@ -559,6 +612,7 @@
{
"id": 2101,
"languageTag": "sr-RS",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "serbian_cyrillic"
@@ -567,6 +621,7 @@
{
"id": 2201,
"languageTag": "lv-LV",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwerty"
@@ -575,6 +630,7 @@
{
"id": 2301,
"languageTag": "ku",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "kurdish_kurmanci"
@@ -583,6 +639,7 @@
{
"id": 2501,
"languageTag": "ca",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "catalan"
@@ -591,6 +648,7 @@
{
"id": 2601,
"languageTag": "IPA-IPA",
"composer": "appender",
"currencySet": "dollar",
"preferred": {
"characters": "ipa",
@@ -601,6 +659,7 @@
{
"id": 2701,
"languageTag": "sk",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwertz"
@@ -609,10 +668,20 @@
{
"id": 2801,
"languageTag": "cs",
"composer": "appender",
"currencySet": "euro",
"preferred": {
"characters": "qwertz"
}
},
{
"id": 2900,
"languageTag": "ko",
"composer": "hangul-unicode",
"currencySet": "south_korean_won",
"preferred": {
"characters": "korean"
}
}
]
}

View File

@@ -0,0 +1,69 @@
{
"basePath": "ime/spelling",
"importSources": [
{
"id": "mozilla_firefox",
"label": "Mozilla Firefox Add-ons",
"url": "https://addons.mozilla.org/firefox/language-tools/",
"format": {
"$": "archive",
"file": {
"name": "^.+\\.xpi$",
"isRequired": true
}
}
},
{
"id": "libre_office",
"label": "LibreOffice [CURRENTLY UNSUPPORTED]",
"url": "https://extensions.libreoffice.org/?Tags%5B%5D=50",
"format": {
"$": "archive",
"file": {
"name": "^.+\\.oxt$",
"isRequired": true
}
}
},
{
"id": "open_office",
"label": "Apache OpenOffice [CURRENTLY UNSUPPORTED]",
"url": "https://extensions.openoffice.org/en/search?f%5B0%5D=field_project_tags%3A157",
"format": {
"$": "archive",
"file": {
"name": "^.+\\.oxt$",
"isRequired": true
}
}
},
{
"id": "free_office",
"label": "SoftMaker FreeOffice",
"url": "https://www.freeoffice.com/en/download/dictionaries",
"format": {
"$": "archive",
"file": {
"name": "^.+\\.sox$",
"isRequired": true
}
}
},
{
"id": "gh_wooorm",
"label": "GitHub collection by Titus Wormer",
"url": "https://github.com/wooorm/dictionaries",
"format": {
"$": "raw",
"affFile": {
"name": "^.+\\.aff$",
"isRequired": true
},
"dicFile": {
"name": "^.+\\.dic$",
"isRequired": true
}
}
}
]
}

View File

@@ -0,0 +1,61 @@
{
"type": "characters",
"name": "bone",
"label": "Bone",
"authors": [ "ostrya" ],
"direction": "ltr",
"modifier": "neo2",
"arrangement": [
[
{ "$": "auto_text_key", "code": 106, "label": "j" },
{ "$": "auto_text_key", "code": 100, "label": "d" },
{ "$": "auto_text_key", "code": 117, "label": "u" },
{ "$": "auto_text_key", "code": 97, "label": "a" },
{ "$": "auto_text_key", "code": 120, "label": "x" },
{ "$": "auto_text_key", "code": 112, "label": "p" },
{ "$": "auto_text_key", "code": 104, "label": "h" },
{ "$": "auto_text_key", "code": 108, "label": "l" },
{ "$": "auto_text_key", "code": 109, "label": "m" },
{ "$": "auto_text_key", "code": 119, "label": "w" },
{ "$": "case_selector",
"lower": {
"code": 223, "label": "ß", "popup": {
"relevant": [
{ "code": 180, "label": "´" }
]
}
},
"upper": {
"code": 7838, "label": "ẞ", "popup": {
"relevant": [
{ "code": 180, "label": "´" }
]
}
}
}
],
[
{ "$": "auto_text_key", "code": 99, "label": "c" },
{ "$": "auto_text_key", "code": 116, "label": "t" },
{ "$": "auto_text_key", "code": 105, "label": "i" },
{ "$": "auto_text_key", "code": 101, "label": "e" },
{ "$": "auto_text_key", "code": 111, "label": "o" },
{ "$": "auto_text_key", "code": 98, "label": "b" },
{ "$": "auto_text_key", "code": 110, "label": "n" },
{ "$": "auto_text_key", "code": 114, "label": "r" },
{ "$": "auto_text_key", "code": 115, "label": "s" },
{ "$": "auto_text_key", "code": 103, "label": "g" },
{ "$": "auto_text_key", "code": 113, "label": "q" }
],
[
{ "$": "auto_text_key", "code": 102, "label": "f" },
{ "$": "auto_text_key", "code": 118, "label": "v" },
{ "$": "auto_text_key", "code": 252, "label": "ü" },
{ "$": "auto_text_key", "code": 228, "label": "ä" },
{ "$": "auto_text_key", "code": 246, "label": "ö" },
{ "$": "auto_text_key", "code": 121, "label": "y" },
{ "$": "auto_text_key", "code": 122, "label": "z" },
{ "$": "auto_text_key", "code": 107, "label": "k" }
]
]
}

View File

@@ -4,28 +4,22 @@
"authors": [ "GoRaN" ],
"mapping": {
"all": {
"": {
"relevant": [
{ "code": 1577, "label": "ة" },
{ "code": 1729, "label": "ـہ" }
]
},
"ر": {
"relevant": [
{ "code": 1685, "label": "ڕ" },
{ "code": 1682, "label": "ڒ" }
]
},
"ی": {
"relevant": [
{ "code": 1746, "label": "ے" },
{ "code": 1610, "label": "ي" },
{ "code": 1744, "label": "ې" },
{ "code": 1741, "label": "ۍ" },
{ "code": 1742, "label": "ێ" },
{ "code": 1744, "label": "ې" },
{ "code": 1610, "label": "ي" },
{ "code": 1597, "label": "ؽ" }
]
},
@@ -34,10 +28,15 @@
"ﺋ": {
"relevant": [
{ "code": 65163, "label": "ﺋ" },
{ "code": 1569, "label": "ء" },
{ "code": 65139, "label": "ﹳ" }
]
},
"ح": {
"relevant": [
{ "code": 65010, "label": "ﷲ" },
{ "code": 65019, "label": "ﷻ" }
]
},
"ع": {
"relevant": [
@@ -56,12 +55,9 @@
]
},
"ف": {
"relevant": [
{ "code": 1701, "label": "ڥ" },
{ "code": 1700, "label": "ڤ" },
{ "code": 1698, "label": "ڢ" },
{ "code": 1697, "label": "ڡ" }
]
@@ -70,7 +66,6 @@
"د": {
"relevant": [
{ "code": 1676, "label": "ڌ" },
{ "code": 1584, "label": "ذ" },
{ "code": 64390, "label": "ﮆ" },
{ "code": 1774, "label": "ۮ" }
]
@@ -93,9 +88,7 @@
},
"ب": {
"relevant": [
{ "code": 65010, "label": "" },
{ "code": 65021, "label": "﷽" },
{ "code": 65019, "label": "ﷻ" }
{ "code": 65021, "label": "" }
]
},
"م": {
@@ -108,7 +101,6 @@
"relevant": [
{ "code": 1718, "label": "ڶ" },
{ "code": 1719, "label": "ڷ" },
{ "code": 1717, "label": "ڵ" },
{ "code": 1720, "label": "ڸ" }
]
},

View File

@@ -0,0 +1,19 @@
{
"type": "characters/extended_popups",
"name": "de-DE-neobone",
"authors": [ "ostrya" ],
"mapping": {
"uri": {
"~right": {
"main": { "code": -255, "label": ".com" },
"relevant": [
{ "code": -255, "label": ".ch" },
{ "code": -255, "label": ".de" },
{ "code": -255, "label": ".at" },
{ "code": -255, "label": ".net" }
]
}
}
}
}

View File

@@ -20,10 +20,10 @@
]
},
"ι": {
"main": { "$": "auto_text_key", "code": 943, "label": "ί" },
"relevant": [
{ "$": "auto_text_key", "code": 912, "label": "ΐ" },
{ "$": "auto_text_key", "code": 970, "label": "ϊ" },
{ "$": "auto_text_key", "code": 943, "label": "ί" }
{ "$": "auto_text_key", "code": 970, "label": "ϊ" }
]
},
"ο": {
@@ -32,10 +32,10 @@
]
},
"υ": {
"main": { "$": "auto_text_key", "code": 973, "label": "ύ" },
"relevant": [
{ "$": "auto_text_key", "code": 944, "label": "ΰ" },
{ "$": "auto_text_key", "code": 971, "label": "ϋ" },
{ "$": "auto_text_key", "code": 973, "label": "ύ" }
{ "$": "auto_text_key", "code": 971, "label": "ϋ" }
]
},
"ω": {

View File

@@ -0,0 +1,75 @@
{
"type": "characters/extended_popups",
"name": "ko",
"authors": [ "patrickgold", "Hayleia" ],
"mapping": {
"all": {
"ㅂ": {
"relevant": [
{ "$": "auto_text_key", "code": 12611, "label": "ㅃ" }
]
},
"ㅈ": {
"relevant": [
{ "$": "auto_text_key", "code": 12617, "label": "ㅉ" }
]
},
"ㄷ": {
"relevant": [
{ "$": "auto_text_key", "code": 12600, "label": "ㄸ" }
]
},
"ㄱ": {
"relevant": [
{ "$": "auto_text_key", "code": 12594, "label": "ㄲ" }
]
},
"ㅅ": {
"relevant": [
{ "$": "auto_text_key", "code": 12614, "label": "ㅆ" }
]
},
"ㅐ": {
"relevant": [
{ "$": "auto_text_key", "code": 12626, "label": "ㅒ" }
]
},
"ㅔ": {
"relevant": [
{ "$": "auto_text_key", "code": 12630, "label": "ㅖ" }
]
},
"~right": {
"main": { "code": 44, "label": "," },
"relevant": [
{ "code": 38, "label": "&" },
{ "code": 37, "label": "%" },
{ "code": 43, "label": "+" },
{ "code": 34, "label": "\"" },
{ "code": 45, "label": "-" },
{ "code": 58, "label": ":" },
{ "code": 39, "label": "'" },
{ "code": 64, "label": "@" },
{ "code": 59, "label": ";" },
{ "code": 47, "label": "/" },
{ "code": 40, "label": "(" },
{ "code": 41, "label": ")" },
{ "code": 35, "label": "#" },
{ "code": 33, "label": "!" },
{ "code": 63, "label": "?" }
]
}
},
"uri": {
"~right": {
"main": { "code": -255, "label": ".com" },
"relevant": [
{ "code": -255, "label": ".gov" },
{ "code": -255, "label": ".edu" },
{ "code": -255, "label": ".org" },
{ "code": -255, "label": ".net" }
]
}
}
}
}

View File

@@ -0,0 +1,77 @@
{
"type": "characters",
"name": "halmak",
"label": "Halmak",
"authors": [ "dessalines" ],
"direction": "ltr",
"arrangement": [
[
{ "$": "auto_text_key", "code": 119, "label": "w" },
{ "$": "auto_text_key", "code": 108, "label": "l" },
{ "$": "auto_text_key", "code": 114, "label": "r" },
{ "$": "auto_text_key", "code": 98, "label": "b" },
{ "$": "auto_text_key", "code": 122, "label": "z" },
{ "$": "case_selector",
"lower": { "code": 59, "label": ";", "popup": {
"relevant": [
{ "code": 58, "label": ":" }
]
} },
"upper": { "code": 58, "label": ":", "popup": {
"relevant": [
{ "code": 59, "label": ";" }
]
} }
},
{ "$": "auto_text_key", "code": 113, "label": "q" },
{ "$": "auto_text_key", "code": 117, "label": "u" },
{ "$": "auto_text_key", "code": 100, "label": "d" },
{ "$": "auto_text_key", "code": 106, "label": "j" }
],
[
{ "$": "auto_text_key", "code": 115, "label": "s" },
{ "$": "auto_text_key", "code": 104, "label": "h" },
{ "$": "auto_text_key", "code": 110, "label": "n" },
{ "$": "auto_text_key", "code": 116, "label": "t" },
{ "$": "case_selector",
"lower": { "code": 44, "label": ",", "popup": {
"relevant": [
{ "code": 40, "label": "(" }
]
} },
"upper": { "code": 40, "label": "(", "popup": {
"relevant": [
{ "code": 44, "label": "," }
]
} }
},
{ "$": "case_selector",
"lower": { "code": 46, "label": ".", "popup": {
"relevant": [
{ "code": 41, "label": ")" }
]
} },
"upper": { "code": 41, "label": ")", "popup": {
"relevant": [
{ "code": 46, "label": "." }
]
} }
},
{ "$": "auto_text_key", "code": 97, "label": "a" },
{ "$": "auto_text_key", "code": 101, "label": "e" },
{ "$": "auto_text_key", "code": 111, "label": "o" },
{ "$": "auto_text_key", "code": 105, "label": "i" }
],
[
{ "$": "auto_text_key", "code": 102, "label": "f" },
{ "$": "auto_text_key", "code": 109, "label": "m" },
{ "$": "auto_text_key", "code": 118, "label": "v" },
{ "$": "auto_text_key", "code": 99, "label": "c" },
{ "$": "auto_text_key", "code": 103, "label": "g" },
{ "$": "auto_text_key", "code": 112, "label": "p" },
{ "$": "auto_text_key", "code": 120, "label": "x" },
{ "$": "auto_text_key", "code": 107, "label": "k" },
{ "$": "auto_text_key", "code": 121, "label": "y" }
]
]
}

View File

@@ -28,27 +28,27 @@
{ "code": 1508, "label": "פ" }
],
[
{ "code": 1513, "label": "ף" },
{ "code": 1491, "label": "ך" },
{ "code": 1490, "label": "ל" },
{ "code": 1499, "label": "ח" },
{ "code": 1506, "label": "י" },
{ "code": 1497, "label": "ע" },
{ "code": 1495, "label": "כ" },
{ "code": 1500, "label": "ג" },
{ "code": 1498, "label": "ד" },
{ "code": 1507, "label": "ש" }
{ "code": 1513, "label": "ש" },
{ "code": 1491, "label": "ד" },
{ "code": 1490, "label": "ג" },
{ "code": 1499, "label": "כ" },
{ "code": 1506, "label": "ע" },
{ "code": 1497, "label": "י" },
{ "code": 1495, "label": "ח" },
{ "code": 1500, "label": "ל" },
{ "code": 1498, "label": "ך" },
{ "code": 1507, "label": "ף" }
],
[
{ "code": 1494, "label": "ץ" },
{ "code": 1505, "label": "ת" },
{ "code": 1489, "label": "צ" },
{ "code": 1492, "label": "מ" },
{ "code": 1494, "label": "ז" },
{ "code": 1505, "label": "ס" },
{ "code": 1489, "label": "ב" },
{ "code": 1492, "label": "ה" },
{ "code": 1504, "label": "נ" },
{ "code": 1502, "label": "ה" },
{ "code": 1510, "label": "ב" },
{ "code": 1514, "label": "ס" },
{ "code": 1509, "label": "ז" }
{ "code": 1502, "label": "מ" },
{ "code": 1510, "label": "צ" },
{ "code": 1514, "label": "ת" },
{ "code": 1509, "label": "ץ" }
]
]
}

View File

@@ -0,0 +1,62 @@
{
"type": "characters",
"name": "korean",
"label": "South Korean standard",
"authors": [ "patrickgold", "Hayleia" ],
"direction": "ltr",
"arrangement": [
[
{ "$": "case_selector",
"lower": { "code": 12610, "label": "ㅂ" },
"upper": { "code": 12611, "label": "ㅃ" }
},
{ "$": "case_selector",
"lower": { "code": 12616, "label": "ㅈ" },
"upper": { "code": 12617, "label": "ㅉ" }
},
{ "$": "case_selector",
"lower": { "code": 12599, "label": "ㄷ" },
"upper": { "code": 12600, "label": "ㄸ" }
},
{ "$": "case_selector",
"lower": { "code": 12593, "label": "ㄱ" },
"upper": { "code": 12594, "label": "ㄲ" }
},
{ "$": "case_selector",
"lower": { "code": 12613, "label": "ㅅ" },
"upper": { "code": 12614, "label": "ㅆ" }
},
{ "$": "auto_text_key", "code": 12635, "label": "ㅛ"},
{ "$": "auto_text_key", "code": 12629, "label": "ㅕ"},
{ "$": "auto_text_key", "code": 12625, "label": "ㅑ"},
{ "$": "case_selector",
"lower": { "code": 12624, "label": "ㅐ" },
"upper": { "code": 12626, "label": "ㅒ" }
},
{ "$": "case_selector",
"lower": { "code": 12628, "label": "ㅔ" },
"upper": { "code": 12630, "label": "ㅖ" }
}
],
[
{ "$": "auto_text_key", "code": 12609, "label": "ㅁ"},
{ "$": "auto_text_key", "code": 12596, "label": "ㄴ"},
{ "$": "auto_text_key", "code": 12615, "label": "ㅇ"},
{ "$": "auto_text_key", "code": 12601, "label": "ㄹ"},
{ "$": "auto_text_key", "code": 12622, "label": "ㅎ"},
{ "$": "auto_text_key", "code": 12631, "label": "ㅗ"},
{ "$": "auto_text_key", "code": 12627, "label": "ㅓ"},
{ "$": "auto_text_key", "code": 12623, "label": "ㅏ"},
{ "$": "auto_text_key", "code": 12643, "label": "ㅣ"}
],
[
{ "$": "auto_text_key", "code": 12619, "label": "ㅋ"},
{ "$": "auto_text_key", "code": 12620, "label": "ㅌ"},
{ "$": "auto_text_key", "code": 12618, "label": "ㅊ"},
{ "$": "auto_text_key", "code": 12621, "label": "ㅍ"},
{ "$": "auto_text_key", "code": 12640, "label": "ㅠ"},
{ "$": "auto_text_key", "code": 12636, "label": "ㅜ"},
{ "$": "auto_text_key", "code": 12641, "label": "ㅡ"}
]
]
}

View File

@@ -1,7 +1,7 @@
{
"type": "characters",
"name": "kurdish",
"label": "کوردی",
"label": "کوردی (قوەرتی نوێ)",
"authors": [ "GoRaN" ],
"direction": "rtl",
"modifier": "kurdish",
@@ -13,34 +13,46 @@
{ "code": 1608, "label": "و", "popup": {
"main": { "code": -255, "label": "وو" }
} },
{ "code": 1749, "label": "" },
{ "code": 1585, "label": "ر" },
{ "code": 1749, "label": "", "popup": {
"main": { "code": 1577, "label": "ة" }
} },
{ "code": 1585, "label": "ر", "popup": {
"main": { "code": 1685, "label": "ڕ" }
} },
{ "code": 1578, "label": "ت", "popup": {
"main": { "code": 1591, "label": "ط" }
} },
{ "code": 1740, "label": "ی" },
{ "code": 1574, "label": "ﺋ"},
{ "code": 1740, "label": "ی", "popup": {
"main": { "code": 1742, "label": "ێ" }
} },
{ "code": 1574, "label": "ﺋ", "popup": {
"main": { "code": 1569, "label": "ء" }
} },
{ "code": 1593, "label": "ع", "popup": {
"main": { "code": 1594, "label": "غ" }
} },
{ "code": 1734, "label": "ۆ" },
{ "code": 1662, "label": "پ", "popup": {
"main": { "code": 1579, "label": "ث" }
} }
],
[
{ "code": 1575, "label": "ا" },
{"code": 1575, "label": "ا"},
{ "code": 1587, "label": "س" },
{ "code": 1588, "label": "ش" },
{ "code": 1583, "label": "د" },
{ "code": 1601, "label": "ف" },
{ "code": 1583, "label": "د", "popup": {
"main": {"code": 1584, "label": "ذ" }
} },
{ "code": 1601, "label": "ف" , "popup": {
"main": {"code": 1700, "label": "ڤ" }
} },
{ "code": 1607, "label": "ھ" },
{ "code": 1688, "label": "ژ" },
{ "code": 1604, "label": "ل" },
{ "code": 1688, "label": "ژ", "popup": {
"main": { "code": 1600, "label": "" }
} },
{ "code": 1604, "label": "ل", "popup": {
"main": { "code": 1717, "label": "ڵ" }
} },
{ "code": 1705, "label": "ک" },
{ "code": 1711, "label": "گ" }
],

View File

@@ -13,7 +13,7 @@
{ "$": "auto_text_key", "code": 116, "label": "t" },
{ "$": "auto_text_key", "code": 121, "label": "y" },
{ "$": "auto_text_key", "code": 117, "label": "u" },
{ "$": "auto_text_key", "code": 305, "label": "ı" },
{ "$": "auto_text_key", "code": 105, "label": "i" },
{ "$": "auto_text_key", "code": 111, "label": "o" },
{ "$": "auto_text_key", "code": 112, "label": "p" },
{ "$": "auto_text_key", "code": 251, "label": "û" }

View File

@@ -1,7 +1,7 @@
{
"type": "characters",
"name": "kurdish_standard",
"label": "کوردی - ستاندارد",
"label": "کوردی (ق‌ڤ‌ف‌غ)",
"authors": [ "GoRaN" ],
"direction": "rtl",
"modifier": "kurdish",
@@ -10,16 +10,14 @@
{ "code": 1602, "label": "ق", "popup": {
"main": { "code": 1647, "label": "ٯ" }
} },
{ "code": 1700, "label": "ڤ", "popup": {
"main": { "code": 1701, "label": "ڥ" }
} },
{ "code": 1601, "label": "ف", "popup": {
"main": { "code": 1698, "label": "ڢ" }
} },
{ "code": 1700, "label": "ڤ" },
{ "code": 1601, "label": "ف" },
{ "code": 1594, "label": "غ" },
{ "code": 1593, "label": "ع"},
{ "code": 1607, "label": "ھ" },
{ "code": 1749, "label": "" },
{ "code": 1749, "label": "", "popup": {
"main": { "code": 1577, "label": "ة" }
} },
{ "code": 1578, "label": "ت", "popup": {
"main": { "code": 1591, "label": "ط" }
@@ -46,7 +44,9 @@
} },
{ "code": 1585, "label": "ر" },
{ "code": 1685, "label": "ڕ" },
{ "code": 1583, "label": "د" },
{ "code": 1583, "label": "د", "popup": {
"main": {"code": 1584, "label": "ذ" }
} },
{ "code": -255, "label": "وو" },
{ "code": 1608, "label": "و" },
{ "code": 1734, "label": "ۆ" },
@@ -55,8 +55,10 @@
],
[
{ "code": 1600, "label": "kashida", "variation": "normal" },
{ "code": 1574, "label": "ﺋ"},
{ "code": 1574, "label": "ﺋ", "popup": {
"main": { "code": 1569, "label": "ء" }
} },
{ "code": 1662, "label": "پ", "popup": {
"main": { "code": 1579, "label": "ث" }

View File

@@ -6,14 +6,17 @@
"direction": "rtl",
"arrangement": [
[
{ "code": 1600, "label": "kashida", "popup":
{ "main": { "code": 8204, "label": "half_space" }
} },
{ "code": 0, "type": "placeholder" },
{ "code": -5, "label": "delete", "type": "enter_editing" }
],
[
{ "code": -202, "label": "view_symbols", "type": "system_gui" },
{ "$": "variation_selector",
"default": { "code": 1567, "label": "؟", "groupId": 1 },
"password": { "code": 1548, "label": "،", "groupId": 1 },
"default": { "code": 1548, "label": "،", "groupId": 1 },
"password": { "code": 35, "label": "#", "groupId": 1 },
"email": { "code": 64, "label": "@", "groupId": 1 },
"uri": { "code": 47, "label": "/", "groupId": 1 }
},

View File

@@ -0,0 +1,53 @@
{
"type": "characters/mod",
"name": "neo2",
"label": "Neo2",
"authors": [ "ostrya" ],
"direction": "ltr",
"arrangement": [
[
{ "code": -1, "label": "shift", "type": "modifier" },
{ "code": 0, "type": "placeholder" },
{ "code": -5, "label": "delete", "type": "enter_editing" }
],
[
{ "code": -202, "label": "view_symbols", "type": "system_gui" },
{ "code": -210, "label": "language_switch", "type": "system_gui" },
{ "code": -213, "label": "switch_to_media_context", "type": "system_gui" },
{ "code": 32, "label": "space" },
{ "$": "variation_selector",
"default": { "code": 44, "label": ",", "groupId": 1,
"popup": {
"main": { "code": 34, "label": "\"" },
"relevant": [
{ "code": 8211, "label": "" }
]
} },
"email": { "code": 64, "label": "@", "groupId": 1,
"popup": {
"relevant": [
{ "code": 44, "label": "," }
]
} },
"uri": { "code": 47, "label": "/", "groupId": 1,
"popup": {
"relevant": [
{ "code": 44, "label": "," }
]
} }
},
{ "$": "variation_selector",
"default": { "code": 46, "label": ".", "groupId": 2,
"popup": {
"relevant": [
{ "code": 183, "label": "·" },
{ "code": 39, "label": "'" }
]
} },
"email": { "code": 46, "label": ".", "groupId": 2 },
"uri": { "code": 46, "label": ".", "groupId": 2 }
},
{ "code": 10, "label": "enter", "groupId": 3, "type": "enter_editing" }
]
]
}

View File

@@ -0,0 +1,41 @@
{
"type": "characters",
"name": "nalmy",
"label": "NALMY",
"authors": [ "jeremiah-mille", "jasmcole" ],
"direction": "ltr",
"arrangement": [
[
{ "$": "auto_text_key", "code": 122, "label": "z" },
{ "$": "auto_text_key", "code": 118, "label": "v" },
{ "$": "auto_text_key", "code": 100, "label": "d" },
{ "$": "auto_text_key", "code": 110, "label": "n" },
{ "$": "auto_text_key", "code": 97, "label": "a" },
{ "$": "auto_text_key", "code": 108, "label": "l" },
{ "$": "auto_text_key", "code": 109, "label": "m" },
{ "$": "auto_text_key", "code": 121, "label": "y" },
{ "$": "auto_text_key", "code": 120, "label": "x" },
{ "$": "auto_text_key", "code": 106, "label": "j" }
],
[
{ "$": "auto_text_key", "code": 107, "label": "k" },
{ "$": "auto_text_key", "code": 103, "label": "g" },
{ "$": "auto_text_key", "code": 105, "label": "i" },
{ "$": "auto_text_key", "code": 101, "label": "e" },
{ "$": "auto_text_key", "code": 114, "label": "r" },
{ "$": "auto_text_key", "code": 111, "label": "o" },
{ "$": "auto_text_key", "code": 112, "label": "p" },
{ "$": "auto_text_key", "code": 98, "label": "b" },
{ "$": "auto_text_key", "code": 113, "label": "q" }
],
[
{ "$": "auto_text_key", "code": 119, "label": "w" },
{ "$": "auto_text_key", "code": 104, "label": "h" },
{ "$": "auto_text_key", "code": 116, "label": "t" },
{ "$": "auto_text_key", "code": 115, "label": "s" },
{ "$": "auto_text_key", "code": 99, "label": "c" },
{ "$": "auto_text_key", "code": 117, "label": "u" },
{ "$": "auto_text_key", "code": 102, "label": "f" }
]
]
}

View File

@@ -0,0 +1,61 @@
{
"type": "characters",
"name": "neo2",
"label": "Neo2",
"authors": [ "ostrya" ],
"direction": "ltr",
"modifier": "neo2",
"arrangement": [
[
{ "$": "auto_text_key", "code": 120, "label": "x" },
{ "$": "auto_text_key", "code": 118, "label": "v" },
{ "$": "auto_text_key", "code": 108, "label": "l" },
{ "$": "auto_text_key", "code": 99, "label": "c" },
{ "$": "auto_text_key", "code": 119, "label": "w" },
{ "$": "auto_text_key", "code": 107, "label": "k" },
{ "$": "auto_text_key", "code": 104, "label": "h" },
{ "$": "auto_text_key", "code": 103, "label": "g" },
{ "$": "auto_text_key", "code": 102, "label": "f" },
{ "$": "auto_text_key", "code": 113, "label": "q" },
{ "$": "case_selector",
"lower": {
"code": 223, "label": "ß", "popup": {
"relevant": [
{ "code": 180, "label": "´" }
]
}
},
"upper": {
"code": 7838, "label": "ẞ", "popup": {
"relevant": [
{ "code": 180, "label": "´" }
]
}
}
}
],
[
{ "$": "auto_text_key", "code": 117, "label": "u" },
{ "$": "auto_text_key", "code": 105, "label": "i" },
{ "$": "auto_text_key", "code": 97, "label": "a" },
{ "$": "auto_text_key", "code": 101, "label": "e" },
{ "$": "auto_text_key", "code": 111, "label": "o" },
{ "$": "auto_text_key", "code": 115, "label": "s" },
{ "$": "auto_text_key", "code": 110, "label": "n" },
{ "$": "auto_text_key", "code": 114, "label": "r" },
{ "$": "auto_text_key", "code": 116, "label": "t" },
{ "$": "auto_text_key", "code": 100, "label": "d" },
{ "$": "auto_text_key", "code": 121, "label": "y" }
],
[
{ "$": "auto_text_key", "code": 252, "label": "ü" },
{ "$": "auto_text_key", "code": 246, "label": "ö" },
{ "$": "auto_text_key", "code": 228, "label": "ä" },
{ "$": "auto_text_key", "code": 112, "label": "p" },
{ "$": "auto_text_key", "code": 122, "label": "z" },
{ "$": "auto_text_key", "code": 98, "label": "b" },
{ "$": "auto_text_key", "code": 109, "label": "m" },
{ "$": "auto_text_key", "code": 106, "label": "j" }
]
]
}

View File

@@ -0,0 +1,41 @@
{
"type": "characters",
"name": "sangaline",
"label": "Sangaline",
"authors": [ "jeremiah-miller", "sangaline" ],
"direction": "ltr",
"arrangement": [
[
{ "$": "auto_text_key", "code": 100, "label": "d" },
{ "$": "auto_text_key", "code": 103, "label": "g" },
{ "$": "auto_text_key", "code": 104, "label": "h" },
{ "$": "auto_text_key", "code": 112, "label": "p" },
{ "$": "auto_text_key", "code": 97, "label": "a" },
{ "$": "auto_text_key", "code": 115, "label": "s" },
{ "$": "auto_text_key", "code": 106, "label": "j" },
{ "$": "auto_text_key", "code": 114, "label": "r" },
{ "$": "auto_text_key", "code": 107, "label": "k" },
{ "$": "auto_text_key", "code": 110, "label": "n" }
],
[
{ "$": "auto_text_key", "code": 105, "label": "i" },
{ "$": "auto_text_key", "code": 113, "label": "q" },
{ "$": "auto_text_key", "code": 118, "label": "v" },
{ "$": "auto_text_key", "code": 117, "label": "u" },
{ "$": "auto_text_key", "code": 119, "label": "w" },
{ "$": "auto_text_key", "code": 99, "label": "c" },
{ "$": "auto_text_key", "code": 108, "label": "l" },
{ "$": "auto_text_key", "code": 120, "label": "x" },
{ "$": "auto_text_key", "code": 109, "label": "m" }
],
[
{ "$": "auto_text_key", "code": 116, "label": "t" },
{ "$": "auto_text_key", "code": 121, "label": "y" },
{ "$": "auto_text_key", "code": 98, "label": "b" },
{ "$": "auto_text_key", "code": 101, "label": "e" },
{ "$": "auto_text_key", "code": 122, "label": "z" },
{ "$": "auto_text_key", "code": 102, "label": "f" },
{ "$": "auto_text_key", "code": 111, "label": "o" }
]
]
}

View File

@@ -0,0 +1,91 @@
{
"type": "numeric_row",
"name": "bengali",
"label": "Bengali",
"authors": [ "yashpalgoyal1304" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 2535, "label": "১", "type": "numeric", "popup": {
"main": { "code": 49, "label": "1" },
"relevant": [
{ "code": 8537, "label": "⅙" },
{ "code": 8528, "label": "⅐" },
{ "code": 8539, "label": "⅛" },
{ "code": 8529, "label": "⅑" },
{ "code": 8530, "label": "⅒" },
{ "code": 185, "label": "¹" },
{ "code": 189, "label": "½" },
{ "code": 8531, "label": "⅓" },
{ "code": 188, "label": "¼" },
{ "code": 8533, "label": "⅕" }
]
} },
{ "code": 2536, "label": "২", "type": "numeric", "popup": {
"main": { "code": 50, "label": "2" },
"relevant": [
{ "code": 8532, "label": "⅔" },
{ "code": 178, "label": "²" },
{ "code": 8534, "label": "⅖" }
]
} },
{ "code": 2537, "label": "৩", "type": "numeric", "popup": {
"main": { "code": 51, "label": "3" },
"relevant": [
{ "code": 8535, "label": "⅗" },
{ "code": 190, "label": "¾" },
{ "code": 179, "label": "³" },
{ "code": 8540, "label": "⅜" }
]
} },
{ "code": 2538, "label": "", "type": "numeric", "popup": {
"main": { "code": 52, "label": "4" },
"relevant": [
{ "code": 8536, "label": "⅘" },
{ "code": 8308, "label": "⁴" }
]
} },
{ "code": 2539, "label": "৫", "type": "numeric", "popup": {
"main": { "code": 53, "label": "5" },
"relevant": [
{ "code": 8538, "label": "⅚" },
{ "code": 8309, "label": "⁵" },
{ "code": 8541, "label": "⅝" }
]
} },
{ "code": 2540, "label": "৬", "type": "numeric", "popup": {
"main": { "code": 54, "label": "6" },
"relevant": [
{ "code": 8310, "label": "⁶" }
]
} },
{ "code": 2541, "label": "", "type": "numeric", "popup": {
"main": { "code": 55, "label": "7" },
"relevant": [
{ "code": 8542, "label": "⅞" },
{ "code": 8311, "label": "⁷" }
]
} },
{ "code": 2542, "label": "৮", "type": "numeric", "popup": {
"main": { "code": 56, "label": "8" },
"relevant": [
{ "code": 8312, "label": "⁸" }
]
} },
{ "code": 2543, "label": "৯", "type": "numeric", "popup": {
"main": { "code": 57, "label": "9" },
"relevant": [
{ "code": 8313, "label": "⁹" }
]
} },
{ "code": 2534, "label": "", "type": "numeric", "popup": {
"main": { "code": 48, "label": "0" },
"relevant": [
{ "code": 8319, "label": "ⁿ" },
{ "code": 8709, "label": "∅" },
{ "code": 8304, "label": "⁰" }
]
} }
]
]
}

View File

@@ -0,0 +1,91 @@
{
"type": "numeric_row",
"name": "devanagari",
"label": "Devanagari",
"authors": [ "yashpalgoyal1304" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 2407, "label": "१", "type": "numeric", "popup": {
"main": { "code": 49, "label": "1" },
"relevant": [
{ "code": 8537, "label": "⅙" },
{ "code": 8528, "label": "⅐" },
{ "code": 8539, "label": "⅛" },
{ "code": 8529, "label": "⅑" },
{ "code": 8530, "label": "⅒" },
{ "code": 185, "label": "¹" },
{ "code": 189, "label": "½" },
{ "code": 8531, "label": "⅓" },
{ "code": 188, "label": "¼" },
{ "code": 8533, "label": "⅕" }
]
} },
{ "code": 2408, "label": "२", "type": "numeric", "popup": {
"main": { "code": 50, "label": "2" },
"relevant": [
{ "code": 8532, "label": "⅔" },
{ "code": 178, "label": "²" },
{ "code": 8534, "label": "⅖" }
]
} },
{ "code": 2409, "label": "३", "type": "numeric", "popup": {
"main": { "code": 51, "label": "3" },
"relevant": [
{ "code": 8535, "label": "⅗" },
{ "code": 190, "label": "¾" },
{ "code": 179, "label": "³" },
{ "code": 8540, "label": "⅜" }
]
} },
{ "code": 2410, "label": "४", "type": "numeric", "popup": {
"main": { "code": 52, "label": "4" },
"relevant": [
{ "code": 8536, "label": "⅘" },
{ "code": 8308, "label": "⁴" }
]
} },
{ "code": 2411, "label": "५", "type": "numeric", "popup": {
"main": { "code": 53, "label": "5" },
"relevant": [
{ "code": 8538, "label": "⅚" },
{ "code": 8309, "label": "⁵" },
{ "code": 8541, "label": "⅝" }
]
} },
{ "code": 2412, "label": "६", "type": "numeric", "popup": {
"main": { "code": 54, "label": "6" },
"relevant": [
{ "code": 8310, "label": "⁶" }
]
} },
{ "code": 2413, "label": "७", "type": "numeric", "popup": {
"main": { "code": 55, "label": "7" },
"relevant": [
{ "code": 8542, "label": "⅞" },
{ "code": 8311, "label": "⁷" }
]
} },
{ "code": 2414, "label": "८", "type": "numeric", "popup": {
"main": { "code": 56, "label": "8" },
"relevant": [
{ "code": 8312, "label": "⁸" }
]
} },
{ "code": 2415, "label": "९", "type": "numeric", "popup": {
"main": { "code": 57, "label": "9" },
"relevant": [
{ "code": 8313, "label": "⁹" }
]
} },
{ "code": 2406, "label": "", "type": "numeric", "popup": {
"main": { "code": 48, "label": "0" },
"relevant": [
{ "code": 8319, "label": "ⁿ" },
{ "code": 8709, "label": "∅" },
{ "code": 8304, "label": "⁰" }
]
} }
]
]
}

View File

@@ -0,0 +1,91 @@
{
"type": "numeric_row",
"name": "gujarati",
"label": "Gujarati",
"authors": [ "yashpalgoyal1304" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 2791, "label": "૧", "type": "numeric", "popup": {
"main": { "code": 49, "label": "1" },
"relevant": [
{ "code": 8537, "label": "⅙" },
{ "code": 8528, "label": "⅐" },
{ "code": 8539, "label": "⅛" },
{ "code": 8529, "label": "⅑" },
{ "code": 8530, "label": "⅒" },
{ "code": 185, "label": "¹" },
{ "code": 189, "label": "½" },
{ "code": 8531, "label": "⅓" },
{ "code": 188, "label": "¼" },
{ "code": 8533, "label": "⅕" }
]
} },
{ "code": 2792, "label": "૨", "type": "numeric", "popup": {
"main": { "code": 50, "label": "2" },
"relevant": [
{ "code": 8532, "label": "⅔" },
{ "code": 178, "label": "²" },
{ "code": 8534, "label": "⅖" }
]
} },
{ "code": 2793, "label": "૩", "type": "numeric", "popup": {
"main": { "code": 51, "label": "3" },
"relevant": [
{ "code": 8535, "label": "⅗" },
{ "code": 190, "label": "¾" },
{ "code": 179, "label": "³" },
{ "code": 8540, "label": "⅜" }
]
} },
{ "code": 2794, "label": "૪", "type": "numeric", "popup": {
"main": { "code": 52, "label": "4" },
"relevant": [
{ "code": 8536, "label": "⅘" },
{ "code": 8308, "label": "⁴" }
]
} },
{ "code": 2795, "label": "૫", "type": "numeric", "popup": {
"main": { "code": 53, "label": "5" },
"relevant": [
{ "code": 8538, "label": "⅚" },
{ "code": 8309, "label": "⁵" },
{ "code": 8541, "label": "⅝" }
]
} },
{ "code": 2796, "label": "૬", "type": "numeric", "popup": {
"main": { "code": 54, "label": "6" },
"relevant": [
{ "code": 8310, "label": "⁶" }
]
} },
{ "code": 2797, "label": "૭", "type": "numeric", "popup": {
"main": { "code": 55, "label": "7" },
"relevant": [
{ "code": 8542, "label": "⅞" },
{ "code": 8311, "label": "⁷" }
]
} },
{ "code": 2798, "label": "૮", "type": "numeric", "popup": {
"main": { "code": 56, "label": "8" },
"relevant": [
{ "code": 8312, "label": "⁸" }
]
} },
{ "code": 2799, "label": "૯", "type": "numeric", "popup": {
"main": { "code": 57, "label": "9" },
"relevant": [
{ "code": 8313, "label": "⁹" }
]
} },
{ "code": 2790, "label": "", "type": "numeric", "popup": {
"main": { "code": 48, "label": "0" },
"relevant": [
{ "code": 8319, "label": "ⁿ" },
{ "code": 8709, "label": "∅" },
{ "code": 8304, "label": "⁰" }
]
} }
]
]
}

View File

@@ -0,0 +1,91 @@
{
"type": "numeric_row",
"name": "gurmukhi",
"label": "Gurmukhi",
"authors": [ "yashpalgoyal1304" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 2663, "label": "", "type": "numeric", "popup": {
"main": { "code": 49, "label": "1" },
"relevant": [
{ "code": 8537, "label": "⅙" },
{ "code": 8528, "label": "⅐" },
{ "code": 8539, "label": "⅛" },
{ "code": 8529, "label": "⅑" },
{ "code": 8530, "label": "⅒" },
{ "code": 185, "label": "¹" },
{ "code": 189, "label": "½" },
{ "code": 8531, "label": "⅓" },
{ "code": 188, "label": "¼" },
{ "code": 8533, "label": "⅕" }
]
} },
{ "code": 2664, "label": "੨", "type": "numeric", "popup": {
"main": { "code": 50, "label": "2" },
"relevant": [
{ "code": 8532, "label": "⅔" },
{ "code": 178, "label": "²" },
{ "code": 8534, "label": "⅖" }
]
} },
{ "code": 2665, "label": "੩", "type": "numeric", "popup": {
"main": { "code": 51, "label": "3" },
"relevant": [
{ "code": 8535, "label": "⅗" },
{ "code": 190, "label": "¾" },
{ "code": 179, "label": "³" },
{ "code": 8540, "label": "⅜" }
]
} },
{ "code": 2666, "label": "", "type": "numeric", "popup": {
"main": { "code": 52, "label": "4" },
"relevant": [
{ "code": 8536, "label": "⅘" },
{ "code": 8308, "label": "⁴" }
]
} },
{ "code": 2667, "label": "੫", "type": "numeric", "popup": {
"main": { "code": 53, "label": "5" },
"relevant": [
{ "code": 8538, "label": "⅚" },
{ "code": 8309, "label": "⁵" },
{ "code": 8541, "label": "⅝" }
]
} },
{ "code": 2668, "label": "੬", "type": "numeric", "popup": {
"main": { "code": 54, "label": "6" },
"relevant": [
{ "code": 8310, "label": "⁶" }
]
} },
{ "code": 2669, "label": "੭", "type": "numeric", "popup": {
"main": { "code": 55, "label": "7" },
"relevant": [
{ "code": 8542, "label": "⅞" },
{ "code": 8311, "label": "⁷" }
]
} },
{ "code": 2670, "label": "੮", "type": "numeric", "popup": {
"main": { "code": 56, "label": "8" },
"relevant": [
{ "code": 8312, "label": "⁸" }
]
} },
{ "code": 2671, "label": "੯", "type": "numeric", "popup": {
"main": { "code": 57, "label": "9" },
"relevant": [
{ "code": 8313, "label": "⁹" }
]
} },
{ "code": 2662, "label": "", "type": "numeric", "popup": {
"main": { "code": 48, "label": "0" },
"relevant": [
{ "code": 8319, "label": "ⁿ" },
{ "code": 8709, "label": "∅" },
{ "code": 8304, "label": "⁰" }
]
} }
]
]
}

View File

@@ -0,0 +1,91 @@
{
"type": "numeric_row",
"name": "kannada",
"label": "Kannada",
"authors": [ "yashpalgoyal1304" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 3303, "label": "೧", "type": "numeric", "popup": {
"main": { "code": 49, "label": "1" },
"relevant": [
{ "code": 8537, "label": "⅙" },
{ "code": 8528, "label": "⅐" },
{ "code": 8539, "label": "⅛" },
{ "code": 8529, "label": "⅑" },
{ "code": 8530, "label": "⅒" },
{ "code": 185, "label": "¹" },
{ "code": 189, "label": "½" },
{ "code": 8531, "label": "⅓" },
{ "code": 188, "label": "¼" },
{ "code": 8533, "label": "⅕" }
]
} },
{ "code": 3304, "label": "೨", "type": "numeric", "popup": {
"main": { "code": 50, "label": "2" },
"relevant": [
{ "code": 8532, "label": "⅔" },
{ "code": 178, "label": "²" },
{ "code": 8534, "label": "⅖" }
]
} },
{ "code": 3305, "label": "೩", "type": "numeric", "popup": {
"main": { "code": 51, "label": "3" },
"relevant": [
{ "code": 8535, "label": "⅗" },
{ "code": 190, "label": "¾" },
{ "code": 179, "label": "³" },
{ "code": 8540, "label": "⅜" }
]
} },
{ "code": 3306, "label": "೪", "type": "numeric", "popup": {
"main": { "code": 52, "label": "4" },
"relevant": [
{ "code": 8536, "label": "⅘" },
{ "code": 8308, "label": "⁴" }
]
} },
{ "code": 3307, "label": "೫", "type": "numeric", "popup": {
"main": { "code": 53, "label": "5" },
"relevant": [
{ "code": 8538, "label": "⅚" },
{ "code": 8309, "label": "⁵" },
{ "code": 8541, "label": "⅝" }
]
} },
{ "code": 3308, "label": "೬", "type": "numeric", "popup": {
"main": { "code": 54, "label": "6" },
"relevant": [
{ "code": 8310, "label": "⁶" }
]
} },
{ "code": 3309, "label": "೭", "type": "numeric", "popup": {
"main": { "code": 55, "label": "7" },
"relevant": [
{ "code": 8542, "label": "⅞" },
{ "code": 8311, "label": "⁷" }
]
} },
{ "code": 3310, "label": "೮", "type": "numeric", "popup": {
"main": { "code": 56, "label": "8" },
"relevant": [
{ "code": 8312, "label": "⁸" }
]
} },
{ "code": 3311, "label": "೯", "type": "numeric", "popup": {
"main": { "code": 57, "label": "9" },
"relevant": [
{ "code": 8313, "label": "⁹" }
]
} },
{ "code": 3302, "label": "", "type": "numeric", "popup": {
"main": { "code": 48, "label": "0" },
"relevant": [
{ "code": 8319, "label": "ⁿ" },
{ "code": 8709, "label": "∅" },
{ "code": 8304, "label": "⁰" }
]
} }
]
]
}

View File

@@ -0,0 +1,91 @@
{
"type": "numeric_row",
"name": "malayalam",
"label": "Malayalam",
"authors": [ "yashpalgoyal1304" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 3431, "label": "൧", "type": "numeric", "popup": {
"main": { "code": 49, "label": "1" },
"relevant": [
{ "code": 8537, "label": "⅙" },
{ "code": 8528, "label": "⅐" },
{ "code": 8539, "label": "⅛" },
{ "code": 8529, "label": "⅑" },
{ "code": 8530, "label": "⅒" },
{ "code": 185, "label": "¹" },
{ "code": 189, "label": "½" },
{ "code": 8531, "label": "⅓" },
{ "code": 188, "label": "¼" },
{ "code": 8533, "label": "⅕" }
]
} },
{ "code": 3432, "label": "൨", "type": "numeric", "popup": {
"main": { "code": 50, "label": "2" },
"relevant": [
{ "code": 8532, "label": "⅔" },
{ "code": 178, "label": "²" },
{ "code": 8534, "label": "⅖" }
]
} },
{ "code": 3433, "label": "൩", "type": "numeric", "popup": {
"main": { "code": 51, "label": "3" },
"relevant": [
{ "code": 8535, "label": "⅗" },
{ "code": 190, "label": "¾" },
{ "code": 179, "label": "³" },
{ "code": 8540, "label": "⅜" }
]
} },
{ "code": 3434, "label": "൪", "type": "numeric", "popup": {
"main": { "code": 52, "label": "4" },
"relevant": [
{ "code": 8536, "label": "⅘" },
{ "code": 8308, "label": "⁴" }
]
} },
{ "code": 3435, "label": "൫", "type": "numeric", "popup": {
"main": { "code": 53, "label": "5" },
"relevant": [
{ "code": 8538, "label": "⅚" },
{ "code": 8309, "label": "⁵" },
{ "code": 8541, "label": "⅝" }
]
} },
{ "code": 3436, "label": "൬", "type": "numeric", "popup": {
"main": { "code": 54, "label": "6" },
"relevant": [
{ "code": 8310, "label": "⁶" }
]
} },
{ "code": 3437, "label": "", "type": "numeric", "popup": {
"main": { "code": 55, "label": "7" },
"relevant": [
{ "code": 8542, "label": "⅞" },
{ "code": 8311, "label": "⁷" }
]
} },
{ "code": 3438, "label": "൮", "type": "numeric", "popup": {
"main": { "code": 56, "label": "8" },
"relevant": [
{ "code": 8312, "label": "⁸" }
]
} },
{ "code": 3439, "label": "൯", "type": "numeric", "popup": {
"main": { "code": 57, "label": "9" },
"relevant": [
{ "code": 8313, "label": "⁹" }
]
} },
{ "code": 3430, "label": "", "type": "numeric", "popup": {
"main": { "code": 48, "label": "0" },
"relevant": [
{ "code": 8319, "label": "ⁿ" },
{ "code": 8709, "label": "∅" },
{ "code": 8304, "label": "⁰" }
]
} }
]
]
}

View File

@@ -0,0 +1,80 @@
{
"type": "numeric_row",
"name": "neo2",
"label": "Neo2",
"authors": [ "ostrya" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 49, "label": "1", "type": "numeric", "popup": {
"relevant": [
{ "code": 176, "label": "°" },
{ "code": 185, "label": "¹" }
]
} },
{ "code": 50, "label": "2", "type": "numeric", "popup": {
"relevant": [
{ "code": 167, "label": "§" },
{ "code": 178, "label": "²" }
]
} },
{ "code": 51, "label": "3", "type": "numeric", "popup": {
"relevant": [
{ "code": 8467, "label": "" },
{ "code": 179, "label": "³" }
]
} },
{ "code": 52, "label": "4", "type": "numeric", "popup": {
"relevant": [
{ "code": 187, "label": "»" },
{ "code": 8250, "label": "" }
]
} },
{ "code": 53, "label": "5", "type": "numeric", "popup": {
"relevant": [
{ "code": 171, "label": "«" },
{ "code": 8249, "label": "" }
]
} },
{ "code": 54, "label": "6", "type": "numeric", "popup": {
"relevant": [
{ "code": 36, "label": "$" },
{ "code": 162, "label": "¢" }
]
} },
{ "code": 55, "label": "7", "type": "numeric", "popup": {
"main": { "code": -801, "label": "currency_slot_1" },
"relevant": [
{ "code": -802, "label": "currency_slot_2" },
{ "code": -803, "label": "currency_slot_3" },
{ "code": -804, "label": "currency_slot_4" },
{ "code": -805, "label": "currency_slot_5" },
{ "code": -806, "label": "currency_slot_6" }
]
} },
{ "code": 56, "label": "8", "type": "numeric", "popup": {
"relevant": [
{ "code": 8222, "label": "„" },
{ "code": 8218, "label": "" }
]
} },
{ "code": 57, "label": "9", "type": "numeric", "popup": {
"relevant": [
{ "code": 8220, "label": "“" },
{ "code": 8216, "label": "" }
]
} },
{ "code": 48, "label": "0", "type": "numeric", "popup": {
"relevant": [
{ "code": 8221, "label": "”" },
{ "code": 8217, "label": "" }
]
} },
{ "code": 45, "label": "-", "type": "numeric", "popup": {
"relevant": [
{ "code": 8212, "label": "—" }
]
} }
]
]
}

View File

@@ -0,0 +1,91 @@
{
"type": "numeric_row",
"name": "oriya",
"label": "Odia",
"authors": [ "yashpalgoyal1304" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 2919, "label": "୧", "type": "numeric", "popup": {
"main": { "code": 49, "label": "1" },
"relevant": [
{ "code": 8537, "label": "⅙" },
{ "code": 8528, "label": "⅐" },
{ "code": 8539, "label": "⅛" },
{ "code": 8529, "label": "⅑" },
{ "code": 8530, "label": "⅒" },
{ "code": 185, "label": "¹" },
{ "code": 189, "label": "½" },
{ "code": 8531, "label": "⅓" },
{ "code": 188, "label": "¼" },
{ "code": 8533, "label": "⅕" }
]
} },
{ "code": 2920, "label": "", "type": "numeric", "popup": {
"main": { "code": 50, "label": "2" },
"relevant": [
{ "code": 8532, "label": "⅔" },
{ "code": 178, "label": "²" },
{ "code": 8534, "label": "⅖" }
]
} },
{ "code": 2921, "label": "୩", "type": "numeric", "popup": {
"main": { "code": 51, "label": "3" },
"relevant": [
{ "code": 8535, "label": "⅗" },
{ "code": 190, "label": "¾" },
{ "code": 179, "label": "³" },
{ "code": 8540, "label": "⅜" }
]
} },
{ "code": 2922, "label": "୪", "type": "numeric", "popup": {
"main": { "code": 52, "label": "4" },
"relevant": [
{ "code": 8536, "label": "⅘" },
{ "code": 8308, "label": "⁴" }
]
} },
{ "code": 2923, "label": "୫", "type": "numeric", "popup": {
"main": { "code": 53, "label": "5" },
"relevant": [
{ "code": 8538, "label": "⅚" },
{ "code": 8309, "label": "⁵" },
{ "code": 8541, "label": "⅝" }
]
} },
{ "code": 2924, "label": "୬", "type": "numeric", "popup": {
"main": { "code": 54, "label": "6" },
"relevant": [
{ "code": 8310, "label": "⁶" }
]
} },
{ "code": 2925, "label": "୭", "type": "numeric", "popup": {
"main": { "code": 55, "label": "7" },
"relevant": [
{ "code": 8542, "label": "⅞" },
{ "code": 8311, "label": "⁷" }
]
} },
{ "code": 2926, "label": "୮", "type": "numeric", "popup": {
"main": { "code": 56, "label": "8" },
"relevant": [
{ "code": 8312, "label": "⁸" }
]
} },
{ "code": 2927, "label": "୯", "type": "numeric", "popup": {
"main": { "code": 57, "label": "9" },
"relevant": [
{ "code": 8313, "label": "⁹" }
]
} },
{ "code": 2918, "label": "", "type": "numeric", "popup": {
"main": { "code": 48, "label": "0" },
"relevant": [
{ "code": 8319, "label": "ⁿ" },
{ "code": 8709, "label": "∅" },
{ "code": 8304, "label": "⁰" }
]
} }
]
]
}

View File

@@ -0,0 +1,91 @@
{
"type": "numeric_row",
"name": "tamil",
"label": "Tamil",
"authors": [ "yashpalgoyal1304" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 3047, "label": "௧", "type": "numeric", "popup": {
"main": { "code": 49, "label": "1" },
"relevant": [
{ "code": 8537, "label": "⅙" },
{ "code": 8528, "label": "⅐" },
{ "code": 8539, "label": "⅛" },
{ "code": 8529, "label": "⅑" },
{ "code": 8530, "label": "⅒" },
{ "code": 185, "label": "¹" },
{ "code": 189, "label": "½" },
{ "code": 8531, "label": "⅓" },
{ "code": 188, "label": "¼" },
{ "code": 8533, "label": "⅕" }
]
} },
{ "code": 3048, "label": "௨", "type": "numeric", "popup": {
"main": { "code": 50, "label": "2" },
"relevant": [
{ "code": 8532, "label": "⅔" },
{ "code": 178, "label": "²" },
{ "code": 8534, "label": "⅖" }
]
} },
{ "code": 3049, "label": "௩", "type": "numeric", "popup": {
"main": { "code": 51, "label": "3" },
"relevant": [
{ "code": 8535, "label": "⅗" },
{ "code": 190, "label": "¾" },
{ "code": 179, "label": "³" },
{ "code": 8540, "label": "⅜" }
]
} },
{ "code": 3050, "label": "௪", "type": "numeric", "popup": {
"main": { "code": 52, "label": "4" },
"relevant": [
{ "code": 8536, "label": "⅘" },
{ "code": 8308, "label": "⁴" }
]
} },
{ "code": 3051, "label": "௫", "type": "numeric", "popup": {
"main": { "code": 53, "label": "5" },
"relevant": [
{ "code": 8538, "label": "⅚" },
{ "code": 8309, "label": "⁵" },
{ "code": 8541, "label": "⅝" }
]
} },
{ "code": 3052, "label": "௬", "type": "numeric", "popup": {
"main": { "code": 54, "label": "6" },
"relevant": [
{ "code": 8310, "label": "⁶" }
]
} },
{ "code": 3053, "label": "௭", "type": "numeric", "popup": {
"main": { "code": 55, "label": "7" },
"relevant": [
{ "code": 8542, "label": "⅞" },
{ "code": 8311, "label": "⁷" }
]
} },
{ "code": 3054, "label": "௮", "type": "numeric", "popup": {
"main": { "code": 56, "label": "8" },
"relevant": [
{ "code": 8312, "label": "⁸" }
]
} },
{ "code": 3055, "label": "௯", "type": "numeric", "popup": {
"main": { "code": 57, "label": "9" },
"relevant": [
{ "code": 8313, "label": "⁹" }
]
} },
{ "code": 3046, "label": "", "type": "numeric", "popup": {
"main": { "code": 48, "label": "0" },
"relevant": [
{ "code": 8319, "label": "ⁿ" },
{ "code": 8709, "label": "∅" },
{ "code": 8304, "label": "⁰" }
]
} }
]
]
}

View File

@@ -0,0 +1,91 @@
{
"type": "numeric_row",
"name": "telugu",
"label": "Telugu",
"authors": [ "yashpalgoyal1304" ],
"direction": "ltr",
"arrangement": [
[
{ "code": 3175, "label": "౧", "type": "numeric", "popup": {
"main": { "code": 49, "label": "1" },
"relevant": [
{ "code": 8537, "label": "⅙" },
{ "code": 8528, "label": "⅐" },
{ "code": 8539, "label": "⅛" },
{ "code": 8529, "label": "⅑" },
{ "code": 8530, "label": "⅒" },
{ "code": 185, "label": "¹" },
{ "code": 189, "label": "½" },
{ "code": 8531, "label": "⅓" },
{ "code": 188, "label": "¼" },
{ "code": 8533, "label": "⅕" }
]
} },
{ "code": 3176, "label": "౨", "type": "numeric", "popup": {
"main": { "code": 50, "label": "2" },
"relevant": [
{ "code": 8532, "label": "⅔" },
{ "code": 178, "label": "²" },
{ "code": 8534, "label": "⅖" }
]
} },
{ "code": 3177, "label": "౩", "type": "numeric", "popup": {
"main": { "code": 51, "label": "3" },
"relevant": [
{ "code": 8535, "label": "⅗" },
{ "code": 190, "label": "¾" },
{ "code": 179, "label": "³" },
{ "code": 8540, "label": "⅜" }
]
} },
{ "code": 3178, "label": "౪", "type": "numeric", "popup": {
"main": { "code": 52, "label": "4" },
"relevant": [
{ "code": 8536, "label": "⅘" },
{ "code": 8308, "label": "⁴" }
]
} },
{ "code": 3179, "label": "౫", "type": "numeric", "popup": {
"main": { "code": 53, "label": "5" },
"relevant": [
{ "code": 8538, "label": "⅚" },
{ "code": 8309, "label": "⁵" },
{ "code": 8541, "label": "⅝" }
]
} },
{ "code": 3180, "label": "౬", "type": "numeric", "popup": {
"main": { "code": 54, "label": "6" },
"relevant": [
{ "code": 8310, "label": "⁶" }
]
} },
{ "code": 3181, "label": "౭", "type": "numeric", "popup": {
"main": { "code": 55, "label": "7" },
"relevant": [
{ "code": 8542, "label": "⅞" },
{ "code": 8311, "label": "⁷" }
]
} },
{ "code": 3182, "label": "౮", "type": "numeric", "popup": {
"main": { "code": 56, "label": "8" },
"relevant": [
{ "code": 8312, "label": "⁸" }
]
} },
{ "code": 3183, "label": "౯", "type": "numeric", "popup": {
"main": { "code": 57, "label": "9" },
"relevant": [
{ "code": 8313, "label": "⁹" }
]
} },
{ "code": 3174, "label": "", "type": "numeric", "popup": {
"main": { "code": 48, "label": "0" },
"relevant": [
{ "code": 8319, "label": "ⁿ" },
{ "code": 8709, "label": "∅" },
{ "code": 8304, "label": "⁰" }
]
} }
]
]
}

View File

@@ -0,0 +1,22 @@
{
"type": "symbols/mod",
"name": "neo2",
"label": "Neo2",
"authors": [ "ostrya" ],
"direction": "ltr",
"arrangement": [
[
{ "code": -203, "label": "view_symbols2", "type": "system_gui" },
{ "code": 0, "type": "placeholder" },
{ "code": -5, "label": "delete", "type": "enter_editing" }
],
[
{ "code": -201, "label": "view_characters", "type": "system_gui" },
{ "code": -205, "label": "view_numeric_advanced", "type": "system_gui" },
{ "code": 32, "label": "space" },
{ "code": 34, "label": "\"" },
{ "code": 39, "label": "'" },
{ "code": 10, "label": "enter", "groupId": 3, "type": "enter_editing" }
]
]
}

View File

@@ -0,0 +1,46 @@
{
"type": "symbols",
"name": "neo2",
"label": "Neo2",
"authors": [ "ostrya" ],
"direction": "ltr",
"modifier": "neo2",
"arrangement": [
[
{ "code": 8230, "label": "…" },
{ "code": 95, "label": "_" },
{ "code": 91, "label": "[" },
{ "code": 93, "label": "]" },
{ "code": 94, "label": "^" },
{ "code": 33, "label": "!" },
{ "code": 60, "label": "<" },
{ "code": 62, "label": ">" },
{ "code": 61, "label": "=" },
{ "code": 38, "label": "&" },
{ "code": 383, "label": "ſ" }
],
[
{ "code": 92, "label": "\\" },
{ "code": 47, "label": "/" },
{ "code": 123, "label": "{" },
{ "code": 125, "label": "}" },
{ "code": 42, "label": "*" },
{ "code": 63, "label": "?" },
{ "code": 40, "label": "(" },
{ "code": 41, "label": ")" },
{ "code": 45, "label": "-" },
{ "code": 58, "label": ":" },
{ "code": 64, "label": "@" }
],
[
{ "code": 35, "label": "#" },
{ "code": 36, "label": "$" },
{ "code": 124, "label": "|" },
{ "code": 126, "label": "~" },
{ "code": 96, "label": "`" },
{ "code": 43, "label": "+" },
{ "code": 37, "label": "%" },
{ "code": 59, "label": ";" }
]
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,54 @@
# For more information about using CMake with Android Studio, read the
# documentation: https://d.android.com/studio/projects/add-native-code.html
cmake_minimum_required(VERSION 3.10.2)
project("florisboard")
set(CMAKE_CXX_STANDARD 17)
include_directories(.)
### ICU4C ###
include_directories(icu4c/include)
#link_directories(${CMAKE_SOURCE_DIR}/../${ANDROID_ABI})
set(JNI_LIBS ${CMAKE_SOURCE_DIR}/../jniLibs/${ANDROID_ABI})
add_library(ICU::data STATIC IMPORTED)
set_property(TARGET ICU::data PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicudata.a")
#add_library(ICU::i18n STATIC IMPORTED)
#set_property(TARGET ICU::i18n PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicui18n.a")
#add_library(ICU::tu STATIC IMPORTED)
#set_property(TARGET ICU::tu PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicutu.a")
add_library(ICU::uc STATIC IMPORTED)
set_property(TARGET ICU::uc PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicuuc.a")
### FlorisBoard ###
add_subdirectory(nuspell)
add_subdirectory(glob_ndk)
add_subdirectory(utils)
add_subdirectory(ime/nlp)
add_subdirectory(ime/spelling)
add_library(
florisboard-native
SHARED
dev_patrickgold_florisboard_ime_nlp_SuggestionList.cpp
dev_patrickgold_florisboard_ime_spelling_SpellingDict.cpp
)
target_compile_options(florisboard-native PRIVATE -ffunction-sections -fdata-sections -fexceptions)
target_link_libraries(
# Destination
florisboard-native
# Sources
android
log
glob_ndk
ICU::uc
ICU::data
Nuspell::nuspell
utils
ime-nlp
ime-spelling
)

View File

@@ -0,0 +1,123 @@
/*
* Copyright (C) 2021 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <jni.h>
#include "ime/nlp/suggestion_list.h"
#pragma ide diagnostic ignored "UnusedLocalVariable"
using namespace ime::nlp;
extern "C"
JNIEXPORT jlong JNICALL
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeInitialize(
JNIEnv *env,
jobject thiz,
jint max_size) {
auto *suggestionList = new SuggestionList(max_size);
return reinterpret_cast<jlong>(suggestionList);
}
extern "C"
JNIEXPORT void JNICALL
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeDispose(
JNIEnv *env,
jobject thiz,
jlong native_ptr) {
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
suggestionList->clear();
delete suggestionList;
}
extern "C"
JNIEXPORT jboolean JNICALL
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeAdd(
JNIEnv *env,
jobject thiz,
jlong native_ptr,
jstring word,
jint freq) {
const char *cWord = env->GetStringUTFChars(word, nullptr);
word_t stdWord = word_t(cWord);
env->ReleaseStringUTFChars(word, cWord);
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
return suggestionList->add(std::move(stdWord), freq);
}
extern "C"
JNIEXPORT void JNICALL
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeClear(
JNIEnv *env,
jobject thiz,
jlong native_ptr) {
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
suggestionList->clear();
}
extern "C"
JNIEXPORT jboolean JNICALL
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeContains(
JNIEnv *env,
jobject thiz,
jlong native_ptr,
jstring element) {
const char *cWord = env->GetStringUTFChars(element, nullptr);
const word_t stdWord = word_t(cWord);
env->ReleaseStringUTFChars(element, cWord);
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
return suggestionList->containsWord(stdWord);
}
extern "C"
JNIEXPORT jstring JNICALL
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeGetOrNull(
JNIEnv *env,
jobject thiz,
jlong native_ptr,
jint index) {
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
auto weightedToken = suggestionList->get(index);
if (weightedToken == nullptr) {
return nullptr;
}
return env->NewStringUTF(weightedToken->data.c_str());
}
extern "C"
JNIEXPORT jint JNICALL
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeSize(
JNIEnv *env,
jobject thiz,
jlong native_ptr) {
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
return suggestionList->size();
}
extern "C"
JNIEXPORT jboolean JNICALL
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeGetIsPrimaryTokenAutoInsert(
JNIEnv *env, jobject thiz, jlong native_ptr) {
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
return suggestionList->isPrimaryTokenAutoInsert;
}
extern "C"
JNIEXPORT void JNICALL
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeSetIsPrimaryTokenAutoInsert(
JNIEnv *env, jobject thiz, jlong native_ptr, jboolean v) {
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
suggestionList->isPrimaryTokenAutoInsert = v;
}

View File

@@ -0,0 +1,90 @@
/*
* Copyright (C) 2021 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <jni.h>
#include <algorithm>
#include "ime/spelling/spellingdict.h"
#include "utils/jni_utils.h"
#pragma ide diagnostic ignored "UnusedLocalVariable"
using namespace ime::spellcheck;
extern "C"
JNIEXPORT jlong JNICALL
Java_dev_patrickgold_florisboard_ime_spelling_SpellingDict_00024Companion_nativeInitialize(
JNIEnv *env,
jobject thiz,
jobject base_path) {
auto strBasePath = utils::j2std_string(env, base_path);
auto *spellingDict = SpellingDict::load(strBasePath);
if (spellingDict == nullptr) {
return 0L;
} else {
return reinterpret_cast<jlong>(spellingDict);
}
}
extern "C"
JNIEXPORT void JNICALL
Java_dev_patrickgold_florisboard_ime_spelling_SpellingDict_00024Companion_nativeDispose(
JNIEnv *env,
jobject thiz,
jlong native_ptr) {
auto spellingDict = reinterpret_cast<SpellingDict *>(native_ptr);
delete spellingDict;
}
extern "C"
JNIEXPORT jboolean JNICALL
Java_dev_patrickgold_florisboard_ime_spelling_SpellingDict_00024Companion_nativeSpell(
JNIEnv *env,
jobject thiz,
jlong native_ptr,
jobject word) {
auto strWord = utils::j2std_string(env, word);
auto spellingDict = reinterpret_cast<SpellingDict *>(native_ptr);
auto result = spellingDict->spell(strWord);
return result;
}
extern "C"
JNIEXPORT jobjectArray JNICALL
Java_dev_patrickgold_florisboard_ime_spelling_SpellingDict_00024Companion_nativeSuggest(
JNIEnv *env,
jobject thiz,
jlong native_ptr,
jobject word,
jint limit) {
auto strWord = utils::j2std_string(env, word);
auto spellingDict = reinterpret_cast<SpellingDict *>(native_ptr);
auto result = spellingDict->suggest(strWord);
auto retSize = std::min(result.size(), (size_t)std::max(0, limit));
jclass jByteArrayClass = env->FindClass("java/nio/ByteBuffer");
jobjectArray jSuggestions = env->NewObjectArray(retSize, jByteArrayClass, nullptr);
for (int n = 0; n < retSize; n++) {
env->SetObjectArrayElement(jSuggestions, n, utils::std2j_string(env, result[n]));
}
return jSuggestions;
}

View File

@@ -0,0 +1,2 @@
add_library(glob_ndk
glob_ndk.c glob_ndk.h)

View File

@@ -0,0 +1,906 @@
/*
* Natanael Arndt, 2011: removed collate.h dependencies
* (my changes are trivial)
*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Guido van Rossum.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* glob(3) -- a superset of the one defined in POSIX 1003.2.
*
* The [!...] convention to negate a range is supported (SysV, Posix, ksh).
*
* Optional extra services, controlled by flags not defined by POSIX:
*
* GLOB_QUOTE:
* Escaping convention: \ inhibits any special meaning the following
* character might have (except \ at end of string is retained).
* GLOB_MAGCHAR:
* Set in gl_flags if pattern contained a globbing character.
* GLOB_NOMAGIC:
* Same as GLOB_NOCHECK, but it will only append pattern if it did
* not contain any magic characters. [Used in csh style globbing]
* GLOB_ALTDIRFUNC:
* Use alternately specified directory access functions.
* GLOB_TILDE:
* expand ~user/foo to the /home/dir/of/user/foo
* GLOB_BRACE:
* expand {1,2}{a,b} to 1a 1b 2a 2b
* gl_matchc:
* Number of matches in the current invocation of glob.
*/
/*
* Some notes on multibyte character support:
* 1. Patterns with illegal byte sequences match nothing - even if
* GLOB_NOCHECK is specified.
* 2. Illegal byte sequences in filenames are handled by treating them as
* single-byte characters with a value of the first byte of the sequence
* cast to wchar_t.
* 3. State-dependent encodings are not currently supported.
*/
#include <sys/param.h>
#include <sys/stat.h>
#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include "glob_ndk.h"
#include <limits.h>
#include <pwd.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#define DOLLAR '$'
#define DOT '.'
#define EOS '\0'
#define LBRACKET '['
#define NOT '!'
#define QUESTION '?'
#define QUOTE '\\'
#define RANGE '-'
#define RBRACKET ']'
#define SEP '/'
#define STAR '*'
#define TILDE '~'
#define UNDERSCORE '_'
#define LBRACE '{'
#define RBRACE '}'
#define SLASH '/'
#define COMMA ','
#ifndef DEBUG
#define M_QUOTE 0x8000000000ULL
#define M_PROTECT 0x4000000000ULL
#define M_MASK 0xffffffffffULL
#define M_CHAR 0x00ffffffffULL
typedef uint_fast64_t Char;
#else
#define M_QUOTE 0x80
#define M_PROTECT 0x40
#define M_MASK 0xff
#define M_CHAR 0x7f
typedef char Char;
#endif
#define CHAR(c) ((Char)((c)&M_CHAR))
#define META(c) ((Char)((c)|M_QUOTE))
#define M_ALL META('*')
#define M_END META(']')
#define M_NOT META('!')
#define M_ONE META('?')
#define M_RNG META('-')
#define M_SET META('[')
#define ismeta(c) (((c)&M_QUOTE) != 0)
static int compare(const void *, const void *);
static int g_Ctoc(const Char *, char *, size_t);
static int g_lstat(Char *, struct stat *, glob_t *);
static DIR *g_opendir(Char *, glob_t *);
static const Char *g_strchr(const Char *, wchar_t);
#ifdef notdef
static Char *g_strcat(Char *, const Char *);
#endif
static int g_stat(Char *, struct stat *, glob_t *);
static int glob0(const Char *, glob_t *, size_t *);
static int glob1(Char *, glob_t *, size_t *);
static int glob2(Char *, Char *, Char *, Char *, glob_t *, size_t *);
static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, size_t *);
static int globextend(const Char *, glob_t *, size_t *);
static const Char *
globtilde(const Char *, Char *, size_t, glob_t *);
static int globexp1(const Char *, glob_t *, size_t *);
static int globexp2(const Char *, const Char *, glob_t *, int *, size_t *);
static int match(Char *, Char *, Char *);
#ifdef DEBUG
static void qprintf(const char *, Char *);
#endif
int
glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob)
{
const char *patnext;
size_t limit;
Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
mbstate_t mbs;
wchar_t wc;
size_t clen;
patnext = pattern;
if (!(flags & GLOB_APPEND)) {
pglob->gl_pathc = 0;
pglob->gl_pathv = NULL;
if (!(flags & GLOB_DOOFFS))
pglob->gl_offs = 0;
}
if (flags & GLOB_LIMIT) {
limit = pglob->gl_matchc;
if (limit == 0)
limit = ARG_MAX;
} else
limit = 0;
pglob->gl_flags = flags & ~GLOB_MAGCHAR;
pglob->gl_errfunc = errfunc;
pglob->gl_matchc = 0;
bufnext = patbuf;
bufend = bufnext + MAXPATHLEN - 1;
if (flags & GLOB_NOESCAPE) {
memset(&mbs, 0, sizeof(mbs));
while (bufend - bufnext >= MB_CUR_MAX) {
clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
if (clen == (size_t)-1 || clen == (size_t)-2)
return (GLOB_NOMATCH);
else if (clen == 0)
break;
*bufnext++ = wc;
patnext += clen;
}
} else {
/* Protect the quoted characters. */
memset(&mbs, 0, sizeof(mbs));
while (bufend - bufnext >= MB_CUR_MAX) {
if (*patnext == QUOTE) {
if (*++patnext == EOS) {
*bufnext++ = QUOTE | M_PROTECT;
continue;
}
prot = M_PROTECT;
} else
prot = 0;
clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
if (clen == (size_t)-1 || clen == (size_t)-2)
return (GLOB_NOMATCH);
else if (clen == 0)
break;
*bufnext++ = wc | prot;
patnext += clen;
}
}
*bufnext = EOS;
if (flags & GLOB_BRACE)
return globexp1(patbuf, pglob, &limit);
else
return glob0(patbuf, pglob, &limit);
}
/*
* Expand recursively a glob {} pattern. When there is no more expansion
* invoke the standard globbing routine to glob the rest of the magic
* characters
*/
static int
globexp1(const Char *pattern, glob_t *pglob, size_t *limit)
{
const Char* ptr = pattern;
int rv;
/* Protect a single {}, for find(1), like csh */
if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
return glob0(pattern, pglob, limit);
while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
if (!globexp2(ptr, pattern, pglob, &rv, limit))
return rv;
return glob0(pattern, pglob, limit);
}
/*
* Recursive brace globbing helper. Tries to expand a single brace.
* If it succeeds then it invokes globexp1 with the new pattern.
* If it fails then it tries to glob the rest of the pattern and returns.
*/
static int
globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, size_t *limit)
{
int i;
Char *lm, *ls;
const Char *pe, *pm, *pm1, *pl;
Char patbuf[MAXPATHLEN];
/* copy part up to the brace */
for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
continue;
*lm = EOS;
ls = lm;
/* Find the balanced brace */
for (i = 0, pe = ++ptr; *pe; pe++)
if (*pe == LBRACKET) {
/* Ignore everything between [] */
for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
continue;
if (*pe == EOS) {
/*
* We could not find a matching RBRACKET.
* Ignore and just look for RBRACE
*/
pe = pm;
}
}
else if (*pe == LBRACE)
i++;
else if (*pe == RBRACE) {
if (i == 0)
break;
i--;
}
/* Non matching braces; just glob the pattern */
if (i != 0 || *pe == EOS) {
*rv = glob0(patbuf, pglob, limit);
return 0;
}
for (i = 0, pl = pm = ptr; pm <= pe; pm++)
switch (*pm) {
case LBRACKET:
/* Ignore everything between [] */
for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
continue;
if (*pm == EOS) {
/*
* We could not find a matching RBRACKET.
* Ignore and just look for RBRACE
*/
pm = pm1;
}
break;
case LBRACE:
i++;
break;
case RBRACE:
if (i) {
i--;
break;
}
/* FALLTHROUGH */
case COMMA:
if (i && *pm == COMMA)
break;
else {
/* Append the current string */
for (lm = ls; (pl < pm); *lm++ = *pl++)
continue;
/*
* Append the rest of the pattern after the
* closing brace
*/
for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
continue;
/* Expand the current pattern */
#ifdef DEBUG
qprintf("globexp2:", patbuf);
#endif
*rv = globexp1(patbuf, pglob, limit);
/* move after the comma, to the next string */
pl = pm + 1;
}
break;
default:
break;
}
*rv = 0;
return 0;
}
/*
* expand tilde from the passwd file.
*/
static const Char *
globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
{
struct passwd *pwd;
char *h;
const Char *p;
Char *b, *eb;
if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
return pattern;
/*
* Copy up to the end of the string or /
*/
eb = &patbuf[patbuf_len - 1];
for (p = pattern + 1, h = (char *) patbuf;
h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
continue;
*h = EOS;
if (((char *) patbuf)[0] == EOS) {
/*
* handle a plain ~ or ~/ by expanding $HOME first (iff
* we're not running setuid or setgid) and then trying
* the password file
*/
if (issetugid() != 0 ||
(h = getenv("HOME")) == NULL) {
if (((h = getlogin()) != NULL &&
(pwd = getpwnam(h)) != NULL) ||
(pwd = getpwuid(getuid())) != NULL)
h = pwd->pw_dir;
else
return pattern;
}
}
else {
/*
* Expand a ~user
*/
if ((pwd = getpwnam((char*) patbuf)) == NULL)
return pattern;
else
h = pwd->pw_dir;
}
/* Copy the home directory */
for (b = patbuf; b < eb && *h; *b++ = *h++)
continue;
/* Append the rest of the pattern */
while (b < eb && (*b++ = *p++) != EOS)
continue;
*b = EOS;
return patbuf;
}
/*
* The main glob() routine: compiles the pattern (optionally processing
* quotes), calls glob1() to do the real pattern matching, and finally
* sorts the list (unless unsorted operation is requested). Returns 0
* if things went well, nonzero if errors occurred.
*/
static int
glob0(const Char *pattern, glob_t *pglob, size_t *limit)
{
const Char *qpatnext;
int err;
size_t oldpathc;
Char *bufnext, c, patbuf[MAXPATHLEN];
qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
oldpathc = pglob->gl_pathc;
bufnext = patbuf;
/* We don't need to check for buffer overflow any more. */
while ((c = *qpatnext++) != EOS) {
switch (c) {
case LBRACKET:
c = *qpatnext;
if (c == NOT)
++qpatnext;
if (*qpatnext == EOS ||
g_strchr(qpatnext+1, RBRACKET) == NULL) {
*bufnext++ = LBRACKET;
if (c == NOT)
--qpatnext;
break;
}
*bufnext++ = M_SET;
if (c == NOT)
*bufnext++ = M_NOT;
c = *qpatnext++;
do {
*bufnext++ = CHAR(c);
if (*qpatnext == RANGE &&
(c = qpatnext[1]) != RBRACKET) {
*bufnext++ = M_RNG;
*bufnext++ = CHAR(c);
qpatnext += 2;
}
} while ((c = *qpatnext++) != RBRACKET);
pglob->gl_flags |= GLOB_MAGCHAR;
*bufnext++ = M_END;
break;
case QUESTION:
pglob->gl_flags |= GLOB_MAGCHAR;
*bufnext++ = M_ONE;
break;
case STAR:
pglob->gl_flags |= GLOB_MAGCHAR;
/* collapse adjacent stars to one,
* to avoid exponential behavior
*/
if (bufnext == patbuf || bufnext[-1] != M_ALL)
*bufnext++ = M_ALL;
break;
default:
*bufnext++ = CHAR(c);
break;
}
}
*bufnext = EOS;
#ifdef DEBUG
qprintf("glob0:", patbuf);
#endif
if ((err = glob1(patbuf, pglob, limit)) != 0)
return(err);
/*
* If there was no match we are going to append the pattern
* if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
* and the pattern did not contain any magic characters
* GLOB_NOMAGIC is there just for compatibility with csh.
*/
if (pglob->gl_pathc == oldpathc) {
if (((pglob->gl_flags & GLOB_NOCHECK) ||
((pglob->gl_flags & GLOB_NOMAGIC) &&
!(pglob->gl_flags & GLOB_MAGCHAR))))
return(globextend(pattern, pglob, limit));
else
return(GLOB_NOMATCH);
}
if (!(pglob->gl_flags & GLOB_NOSORT))
qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
pglob->gl_pathc - oldpathc, sizeof(char *), compare);
return(0);
}
static int
compare(const void *p, const void *q)
{
return(strcmp(*(char **)p, *(char **)q));
}
static int
glob1(Char *pattern, glob_t *pglob, size_t *limit)
{
Char pathbuf[MAXPATHLEN];
/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
if (*pattern == EOS)
return(0);
return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
pattern, pglob, limit));
}
/*
* The functions glob2 and glob3 are mutually recursive; there is one level
* of recursion for each segment in the pattern that contains one or more
* meta characters.
*/
static int
glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
glob_t *pglob, size_t *limit)
{
struct stat sb;
Char *p, *q;
int anymeta;
/*
* Loop over pattern segments until end of pattern or until
* segment with meta character found.
*/
for (anymeta = 0;;) {
if (*pattern == EOS) { /* End of pattern? */
*pathend = EOS;
if (g_lstat(pathbuf, &sb, pglob))
return(0);
if (((pglob->gl_flags & GLOB_MARK) &&
pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
|| (S_ISLNK(sb.st_mode) &&
(g_stat(pathbuf, &sb, pglob) == 0) &&
S_ISDIR(sb.st_mode)))) {
if (pathend + 1 > pathend_last)
return (GLOB_ABORTED);
*pathend++ = SEP;
*pathend = EOS;
}
++pglob->gl_matchc;
return(globextend(pathbuf, pglob, limit));
}
/* Find end of next segment, copy tentatively to pathend. */
q = pathend;
p = pattern;
while (*p != EOS && *p != SEP) {
if (ismeta(*p))
anymeta = 1;
if (q + 1 > pathend_last)
return (GLOB_ABORTED);
*q++ = *p++;
}
if (!anymeta) { /* No expansion, do next segment. */
pathend = q;
pattern = p;
while (*pattern == SEP) {
if (pathend + 1 > pathend_last)
return (GLOB_ABORTED);
*pathend++ = *pattern++;
}
} else /* Need expansion, recurse. */
return(glob3(pathbuf, pathend, pathend_last, pattern, p,
pglob, limit));
}
/* NOTREACHED */
}
static int
glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
Char *pattern, Char *restpattern,
glob_t *pglob, size_t *limit)
{
struct dirent *dp;
DIR *dirp;
int err;
char buf[MAXPATHLEN];
/*
* The readdirfunc declaration can't be prototyped, because it is
* assigned, below, to two functions which are prototyped in glob.h
* and dirent.h as taking pointers to differently typed opaque
* structures.
*/
struct dirent *(*readdirfunc)();
if (pathend > pathend_last)
return (GLOB_ABORTED);
*pathend = EOS;
errno = 0;
if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
/* TODO: don't call for ENOENT or ENOTDIR? */
if (pglob->gl_errfunc) {
if (g_Ctoc(pathbuf, buf, sizeof(buf)))
return (GLOB_ABORTED);
if (pglob->gl_errfunc(buf, errno) ||
pglob->gl_flags & GLOB_ERR)
return (GLOB_ABORTED);
}
return(0);
}
err = 0;
/* Search directory for matching names. */
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
readdirfunc = pglob->gl_readdir;
else
readdirfunc = readdir;
while ((dp = (*readdirfunc)(dirp))) {
char *sc;
Char *dc;
wchar_t wc;
size_t clen;
mbstate_t mbs;
/* Initial DOT must be matched literally. */
if (dp->d_name[0] == DOT && *pattern != DOT)
continue;
memset(&mbs, 0, sizeof(mbs));
dc = pathend;
sc = dp->d_name;
while (dc < pathend_last) {
clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
if (clen == (size_t)-1 || clen == (size_t)-2) {
wc = *sc;
clen = 1;
memset(&mbs, 0, sizeof(mbs));
}
if ((*dc++ = wc) == EOS)
break;
sc += clen;
}
if (!match(pathend, pattern, restpattern)) {
*pathend = EOS;
continue;
}
err = glob2(pathbuf, --dc, pathend_last, restpattern,
pglob, limit);
if (err)
break;
}
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
(*pglob->gl_closedir)(dirp);
else
closedir(dirp);
return(err);
}
/*
* Extend the gl_pathv member of a glob_t structure to accomodate a new item,
* add the new item, and update gl_pathc.
*
* This assumes the BSD realloc, which only copies the block when its size
* crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
* behavior.
*
* Return 0 if new item added, error code if memory couldn't be allocated.
*
* Invariant of the glob_t structure:
* Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
* gl_pathv points to (gl_offs + gl_pathc + 1) items.
*/
static int
globextend(const Char *path, glob_t *pglob, size_t *limit)
{
char **pathv;
size_t i, newsize, len;
char *copy;
const Char *p;
if (*limit && pglob->gl_pathc > *limit) {
errno = 0;
return (GLOB_NOSPACE);
}
newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
pathv = pglob->gl_pathv ?
realloc((char *)pglob->gl_pathv, newsize) :
malloc(newsize);
if (pathv == NULL) {
if (pglob->gl_pathv) {
free(pglob->gl_pathv);
pglob->gl_pathv = NULL;
}
return(GLOB_NOSPACE);
}
if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
/* first time around -- clear initial gl_offs items */
pathv += pglob->gl_offs;
for (i = pglob->gl_offs + 1; --i > 0; )
*--pathv = NULL;
}
pglob->gl_pathv = pathv;
for (p = path; *p++;)
continue;
len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
if ((copy = malloc(len)) != NULL) {
if (g_Ctoc(path, copy, len)) {
free(copy);
return (GLOB_NOSPACE);
}
pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
}
pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
return(copy == NULL ? GLOB_NOSPACE : 0);
}
/*
* pattern matching function for filenames. Each occurrence of the *
* pattern causes a recursion level.
*/
static int
match(Char *name, Char *pat, Char *patend)
{
int ok, negate_range;
Char c, k;
while (pat < patend) {
c = *pat++;
switch (c & M_MASK) {
case M_ALL:
if (pat == patend)
return(1);
do
if (match(name, pat, patend))
return(1);
while (*name++ != EOS);
return(0);
case M_ONE:
if (*name++ == EOS)
return(0);
break;
case M_SET:
ok = 0;
if ((k = *name++) == EOS)
return(0);
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
++pat;
while (((c = *pat++) & M_MASK) != M_END)
if ((*pat & M_MASK) == M_RNG) {
if (CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1])) ok = 1;
pat += 2;
} else if (c == k)
ok = 1;
if (ok == negate_range)
return(0);
break;
default:
if (*name++ != c)
return(0);
break;
}
}
return(*name == EOS);
}
/* Free allocated data belonging to a glob_t structure. */
void
globfree(glob_t *pglob)
{
size_t i;
char **pp;
if (pglob->gl_pathv != NULL) {
pp = pglob->gl_pathv + pglob->gl_offs;
for (i = pglob->gl_pathc; i--; ++pp)
if (*pp)
free(*pp);
free(pglob->gl_pathv);
pglob->gl_pathv = NULL;
}
}
static DIR *
g_opendir(Char *str, glob_t *pglob)
{
char buf[MAXPATHLEN];
if (!*str)
strcpy(buf, ".");
else {
if (g_Ctoc(str, buf, sizeof(buf)))
return (NULL);
}
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
return((*pglob->gl_opendir)(buf));
return(opendir(buf));
}
static int
g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
{
char buf[MAXPATHLEN];
if (g_Ctoc(fn, buf, sizeof(buf))) {
errno = ENAMETOOLONG;
return (-1);
}
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
return((*pglob->gl_lstat)(buf, sb));
return(lstat(buf, sb));
}
static int
g_stat(Char *fn, struct stat *sb, glob_t *pglob)
{
char buf[MAXPATHLEN];
if (g_Ctoc(fn, buf, sizeof(buf))) {
errno = ENAMETOOLONG;
return (-1);
}
if (pglob->gl_flags & GLOB_ALTDIRFUNC)
return((*pglob->gl_stat)(buf, sb));
return(stat(buf, sb));
}
static const Char *
g_strchr(const Char *str, wchar_t ch)
{
do {
if (*str == ch)
return (str);
} while (*str++);
return (NULL);
}
static int
g_Ctoc(const Char *str, char *buf, size_t len)
{
mbstate_t mbs;
size_t clen;
memset(&mbs, 0, sizeof(mbs));
while (len >= MB_CUR_MAX) {
clen = wcrtomb(buf, *str, &mbs);
if (clen == (size_t)-1)
return (1);
if (*str == L'\0')
return (0);
str++;
buf += clen;
len -= clen;
}
return (1);
}
#ifdef DEBUG
static void
qprintf(const char *str, Char *s)
{
Char *p;
(void)printf("%s:\n", str);
for (p = s; *p; p++)
(void)printf("%c", CHAR(*p));
(void)printf("\n");
for (p = s; *p; p++)
(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
(void)printf("\n");
for (p = s; *p; p++)
(void)printf("%c", ismeta(*p) ? '_' : ' ');
(void)printf("\n");
}
#endif

View File

@@ -0,0 +1,98 @@
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Guido van Rossum.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)glob.h 8.1 (Berkeley) 6/2/93
* $FreeBSD$
*/
#ifndef _GLOB_H_
#define _GLOB_H_
#include <sys/cdefs.h>
#include <sys/types.h>
#include <glob.h>
/*#ifndef _SIZE_T_DECLARED
typedef __size_t size_t;
#define _SIZE_T_DECLARED
#endif*/
struct stat;
typedef struct {
size_t gl_pathc; /* Count of total paths so far. */
size_t gl_matchc; /* Count of paths matching pattern. */
size_t gl_offs; /* Reserved at beginning of gl_pathv. */
int gl_flags; /* Copy of flags parameter to glob. */
char **gl_pathv; /* List of paths matching pattern. */
/* Copy of errfunc parameter to glob. */
int (*gl_errfunc)(const char *, int);
/*
* Alternate filesystem access methods for glob; replacement
* versions of closedir(3), readdir(3), opendir(3), stat(2)
* and lstat(2).
*/
void (*gl_closedir)(void *);
struct dirent *(*gl_readdir)(void *);
void *(*gl_opendir)(const char *);
int (*gl_lstat)(const char *, struct stat *);
int (*gl_stat)(const char *, struct stat *);
} glob_t;
/* Believed to have been introduced in 1003.2-1992 */
#define GLOB_APPEND 0x0001 /* Append to output from previous call. */
#define GLOB_DOOFFS 0x0002 /* Prepend `gl_offs` null pointers (leaving space for exec, say). */
#define GLOB_ERR 0x0004 /* Return on error. */
#define GLOB_MARK 0x0008 /* Append "/" to the names of returned directories. */
#define GLOB_NOCHECK 0x0010 /* Return pattern itself if nothing matches. */
#define GLOB_NOSORT 0x0020 /* Don't sort. */
#define GLOB_NOESCAPE 0x2000 /* Disable backslash escaping. */
/* Error values returned by glob(3) */
#define GLOB_NOSPACE (-1) /* Malloc call failed. */
#define GLOB_ABORTED (-2) /* Unignored error. */
#define GLOB_NOMATCH (-3) /* No match and GLOB_NOCHECK was not set. */
#define GLOB_ALTDIRFUNC 0x0040 /* Use alternately specified directory funcs. */
#define GLOB_BRACE 0x0080 /* Expand braces like csh. */
#define GLOB_MAGCHAR 0x0100 /* Set in `gl_flags` if the pattern had globbing characters. */
#define GLOB_NOMAGIC 0x0200 /* GLOB_NOCHECK without magic chars (csh). */
#define GLOB_QUOTE 0x0400 /* Quote special chars with \. */
#define GLOB_TILDE 0x0800 /* Expand tilde names from the passwd file. */
#define GLOB_LIMIT 0x1000 /* limit number of returned paths */
__BEGIN_DECLS
int glob(const char *, int, int (*)(const char *, int), glob_t *);
void globfree(glob_t *);
__END_DECLS
#endif /* !_GLOB_H_ */

View File

@@ -0,0 +1,55 @@
{
"strategy": "subtractive",
"featureFilters": {
"coll_ucadata": {
"filterType": "exclude"
},
"coll_tree": {
"filterType": "exclude"
},
"confusables": {
"filterType": "exclude"
},
"curr_supplemental": {
"filterType": "exclude"
},
"curr_tree": {
"filterType": "exclude"
},
"locales_tree": {
"excludelist": [
"en_US_POSIX"
]
},
"misc": {
"excludelist": [
"currencyNumericCodes",
"genderList"
]
},
"region_tree": {
"filterType": "exclude"
},
"rbnf_tree": {
"filterType": "exclude"
},
"stringprep": {
"filterType": "exclude"
},
"translit": {
"filterType": "exclude"
},
"unames": {
"filterType": "exclude"
},
"unit_tree": {
"filterType": "exclude"
},
"zone_supplemental": {
"filterType": "exclude"
},
"zone_tree": {
"filterType": "exclude"
}
}
}

View File

@@ -0,0 +1,106 @@
#!/bin/bash
# Copyright 2021 Patrick Goldinger
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Build script for ICU4C, tailored exactly for FlorisBoard's needs.
# Before executing this script to manually rebuild the ICU libraries, make sure to execute
# git submodule update --init --recursive
# else the script won't find the ICU source files!
###### Build ICU4C ######
./android/cc-icu4c.sh build \
--arch=arm,arm64 \
--api=23 \
--library-type=static \
--build-dir=./build \
--icu-src-dir=./android/icu/icu4c \
--install-include-dir=./include \
--install-libs-dir=./../../jniLibs \
--data-filter-file=./data-feature-filter.json \
--enable-collation=no \
--enable-formatting=no \
--enable-legacy-converters=yes \
--enable-regex=no \
--enable-transliteration=no
###### Clean up unused header files in include/unicode header dir ######
readonly SEP=":"
readonly NUSPELL_DIR=$(realpath ../nuspell)
readonly UNICODE_DIR=$(realpath include/unicode)
scan_file() {
file=$1
local -n var=$2
#echo "Scanning '$file'..."
while IFS= read -r line; do
case $line in
*"#include <unicode/"*)
# shellcheck disable=SC2001
header=$(sed -e 's/.*<unicode\/\(.*\)>.*/\1/' <<< "$line")
;;
*"#include \"unicode/"*)
# shellcheck disable=SC2001
header=$(sed -e 's/.*\"unicode\/\(.*\)\".*/\1/' <<< "$line")
;;
*)
header=""
;;
esac
if [ -z "$header" ]; then
continue
fi
# shellcheck disable=SC2091
# shellcheck disable=SC2086
if [[ ! "$var" == *"$header"* ]]; then
# shellcheck disable=SC2140
var+="$SEP$header"
fi
done < "$file"
}
req_headers=""
for nsrcfile in "$NUSPELL_DIR"/*; do
scan_file "$nsrcfile" "req_headers"
done
if [ -n "$req_headers" ]; then
req_headers=${req_headers:1}
fi
while true; do
old_req_headers=$req_headers
IFS="$SEP" read -ra req_header_splitted <<< "$req_headers"
for req_header in "${req_header_splitted[@]}"; do
scan_file "$UNICODE_DIR/$req_header" "req_headers"
done
[ ! $req_headers = $old_req_headers ] || break
done
#echo "$req_headers"
for headerfile in "$UNICODE_DIR"/*; do
header=$(basename "$headerfile")
if [[ "$req_headers" == *"$header"* ]]; then
echo "KEEP '$headerfile'"
else
echo "DELETE '$headerfile'"
rm "$headerfile"
fi
done

View File

@@ -0,0 +1,670 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
********************************************************************************
* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
* File brkiter.h
*
* Modification History:
*
* Date Name Description
* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
* 05/07/97 aliu Fixed DLL declaration.
* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
* 08/11/98 helena Sync-up JDK1.2.
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
********************************************************************************
*/
#ifndef BRKITER_H
#define BRKITER_H
#include "unicode/utypes.h"
/**
* \file
* \brief C++ API: Break Iterator.
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#if UCONFIG_NO_BREAK_ITERATION
U_NAMESPACE_BEGIN
/*
* Allow the declaration of APIs with pointers to BreakIterator
* even when break iteration is removed from the build.
*/
class BreakIterator;
U_NAMESPACE_END
#else
#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/locid.h"
#include "unicode/ubrk.h"
#include "unicode/strenum.h"
#include "unicode/utext.h"
#include "unicode/umisc.h"
U_NAMESPACE_BEGIN
/**
* The BreakIterator class implements methods for finding the location
* of boundaries in text. BreakIterator is an abstract base class.
* Instances of BreakIterator maintain a current position and scan over
* text returning the index of characters where boundaries occur.
* <p>
* Line boundary analysis determines where a text string can be broken
* when line-wrapping. The mechanism correctly handles punctuation and
* hyphenated words.
* <p>
* Sentence boundary analysis allows selection with correct
* interpretation of periods within numbers and abbreviations, and
* trailing punctuation marks such as quotation marks and parentheses.
* <p>
* Word boundary analysis is used by search and replace functions, as
* well as within text editing applications that allow the user to
* select words with a double click. Word selection provides correct
* interpretation of punctuation marks within and following
* words. Characters that are not part of a word, such as symbols or
* punctuation marks, have word-breaks on both sides.
* <p>
* Character boundary analysis allows users to interact with
* characters as they expect to, for example, when moving the cursor
* through a text string. Character boundary analysis provides correct
* navigation of through character strings, regardless of how the
* character is stored. For example, an accented character might be
* stored as a base character and a diacritical mark. What users
* consider to be a character can differ between languages.
* <p>
* The text boundary positions are found according to the rules
* described in Unicode Standard Annex #29, Text Boundaries, and
* Unicode Standard Annex #14, Line Breaking Properties. These
* are available at http://www.unicode.org/reports/tr14/ and
* http://www.unicode.org/reports/tr29/.
* <p>
* In addition to the C++ API defined in this header file, a
* plain C API with equivalent functionality is defined in the
* file ubrk.h
* <p>
* Code snippets illustrating the use of the Break Iterator APIs
* are available in the ICU User Guide,
* http://icu-project.org/userguide/boundaryAnalysis.html
* and in the sample program icu/source/samples/break/break.cpp
*
*/
class U_COMMON_API BreakIterator : public UObject {
public:
/**
* destructor
* @stable ICU 2.0
*/
virtual ~BreakIterator();
/**
* Return true if another object is semantically equal to this
* one. The other object should be an instance of the same subclass of
* BreakIterator. Objects of different subclasses are considered
* unequal.
* <P>
* Return true if this BreakIterator is at the same position in the
* same text, and is the same class and type (word, line, etc.) of
* BreakIterator, as the argument. Text is considered the same if
* it contains the same characters, it need not be the same
* object, and styles are not considered.
* @stable ICU 2.0
*/
virtual UBool operator==(const BreakIterator&) const = 0;
/**
* Returns the complement of the result of operator==
* @param rhs The BreakIterator to be compared for inequality
* @return the complement of the result of operator==
* @stable ICU 2.0
*/
UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
/**
* Return a polymorphic copy of this object. This is an abstract
* method which subclasses implement.
* @stable ICU 2.0
*/
virtual BreakIterator* clone() const = 0;
/**
* Return a polymorphic class ID for this object. Different subclasses
* will return distinct unequal values.
* @stable ICU 2.0
*/
virtual UClassID getDynamicClassID(void) const = 0;
/**
* Return a CharacterIterator over the text being analyzed.
* @stable ICU 2.0
*/
virtual CharacterIterator& getText(void) const = 0;
/**
* Get a UText for the text being analyzed.
* The returned UText is a shallow clone of the UText used internally
* by the break iterator implementation. It can safely be used to
* access the text without impacting any break iterator operations,
* but the underlying text itself must not be altered.
*
* @param fillIn A UText to be filled in. If NULL, a new UText will be
* allocated to hold the result.
* @param status receives any error codes.
* @return The current UText for this break iterator. If an input
* UText was provided, it will always be returned.
* @stable ICU 3.4
*/
virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
/**
* Change the text over which this operates. The text boundary is
* reset to the start.
*
* The BreakIterator will retain a reference to the supplied string.
* The caller must not modify or delete the text while the BreakIterator
* retains the reference.
*
* @param text The UnicodeString used to change the text.
* @stable ICU 2.0
*/
virtual void setText(const UnicodeString &text) = 0;
/**
* Reset the break iterator to operate over the text represented by
* the UText. The iterator position is reset to the start.
*
* This function makes a shallow clone of the supplied UText. This means
* that the caller is free to immediately close or otherwise reuse the
* Utext that was passed as a parameter, but that the underlying text itself
* must not be altered while being referenced by the break iterator.
*
* All index positions returned by break iterator functions are
* native indices from the UText. For example, when breaking UTF-8
* encoded text, the break positions returned by next(), previous(), etc.
* will be UTF-8 string indices, not UTF-16 positions.
*
* @param text The UText used to change the text.
* @param status receives any error codes.
* @stable ICU 3.4
*/
virtual void setText(UText *text, UErrorCode &status) = 0;
/**
* Change the text over which this operates. The text boundary is
* reset to the start.
* Note that setText(UText *) provides similar functionality to this function,
* and is more efficient.
* @param it The CharacterIterator used to change the text.
* @stable ICU 2.0
*/
virtual void adoptText(CharacterIterator* it) = 0;
enum {
/**
* DONE is returned by previous() and next() after all valid
* boundaries have been returned.
* @stable ICU 2.0
*/
DONE = (int32_t)-1
};
/**
* Sets the current iteration position to the beginning of the text, position zero.
* @return The offset of the beginning of the text, zero.
* @stable ICU 2.0
*/
virtual int32_t first(void) = 0;
/**
* Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
* @return The index immediately BEYOND the last character in the text being scanned.
* @stable ICU 2.0
*/
virtual int32_t last(void) = 0;
/**
* Set the iterator position to the boundary preceding the current boundary.
* @return The character index of the previous text boundary or DONE if all
* boundaries have been returned.
* @stable ICU 2.0
*/
virtual int32_t previous(void) = 0;
/**
* Advance the iterator to the boundary following the current boundary.
* @return The character index of the next text boundary or DONE if all
* boundaries have been returned.
* @stable ICU 2.0
*/
virtual int32_t next(void) = 0;
/**
* Return character index of the current iterator position within the text.
* @return The boundary most recently returned.
* @stable ICU 2.0
*/
virtual int32_t current(void) const = 0;
/**
* Advance the iterator to the first boundary following the specified offset.
* The value returned is always greater than the offset or
* the value BreakIterator.DONE
* @param offset the offset to begin scanning.
* @return The first boundary after the specified offset.
* @stable ICU 2.0
*/
virtual int32_t following(int32_t offset) = 0;
/**
* Set the iterator position to the first boundary preceding the specified offset.
* The value returned is always smaller than the offset or
* the value BreakIterator.DONE
* @param offset the offset to begin scanning.
* @return The first boundary before the specified offset.
* @stable ICU 2.0
*/
virtual int32_t preceding(int32_t offset) = 0;
/**
* Return true if the specified position is a boundary position.
* As a side effect, the current position of the iterator is set
* to the first boundary position at or following the specified offset.
* @param offset the offset to check.
* @return True if "offset" is a boundary position.
* @stable ICU 2.0
*/
virtual UBool isBoundary(int32_t offset) = 0;
/**
* Set the iterator position to the nth boundary from the current boundary
* @param n the number of boundaries to move by. A value of 0
* does nothing. Negative values move to previous boundaries
* and positive values move to later boundaries.
* @return The new iterator position, or
* DONE if there are fewer than |n| boundaries in the specified direction.
* @stable ICU 2.0
*/
virtual int32_t next(int32_t n) = 0;
/**
* For RuleBasedBreakIterators, return the status tag from the break rule
* that determined the boundary at the current iteration position.
* <p>
* For break iterator types that do not support a rule status,
* a default value of 0 is returned.
* <p>
* @return the status from the break rule that determined the boundary at
* the current iteration position.
* @see RuleBaseBreakIterator::getRuleStatus()
* @see UWordBreak
* @stable ICU 52
*/
virtual int32_t getRuleStatus() const;
/**
* For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
* that determined the boundary at the current iteration position.
* <p>
* For break iterator types that do not support rule status,
* no values are returned.
* <p>
* The returned status value(s) are stored into an array provided by the caller.
* The values are stored in sorted (ascending) order.
* If the capacity of the output array is insufficient to hold the data,
* the output will be truncated to the available length, and a
* U_BUFFER_OVERFLOW_ERROR will be signaled.
* <p>
* @see RuleBaseBreakIterator::getRuleStatusVec
*
* @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the
* normal way, without attempting to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
* the boundary at the current iteration position.
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
* is the total number of status values that were available,
* not the reduced number that were actually returned.
* @see getRuleStatus
* @stable ICU 52
*/
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
/**
* Create BreakIterator for word-breaks using the given locale.
* Returns an instance of a BreakIterator implementing word breaks.
* WordBreak is useful for word selection (ex. double click)
* @param where the locale.
* @param status the error code
* @return A BreakIterator for word-breaks. The UErrorCode& status
* parameter is used to return status information to the user.
* To check whether the construction succeeded or not, you should check
* the value of U_SUCCESS(err). If you wish more detailed information, you
* can check for informational error results which still indicate success.
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
* used; neither the requested locale nor any of its fall back locales
* could be found.
* The caller owns the returned object and is responsible for deleting it.
* @stable ICU 2.0
*/
static BreakIterator* U_EXPORT2
createWordInstance(const Locale& where, UErrorCode& status);
/**
* Create BreakIterator for line-breaks using specified locale.
* Returns an instance of a BreakIterator implementing line breaks. Line
* breaks are logically possible line breaks, actual line breaks are
* usually determined based on display width.
* LineBreak is useful for word wrapping text.
* @param where the locale.
* @param status The error code.
* @return A BreakIterator for line-breaks. The UErrorCode& status
* parameter is used to return status information to the user.
* To check whether the construction succeeded or not, you should check
* the value of U_SUCCESS(err). If you wish more detailed information, you
* can check for informational error results which still indicate success.
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
* used; neither the requested locale nor any of its fall back locales
* could be found.
* The caller owns the returned object and is responsible for deleting it.
* @stable ICU 2.0
*/
static BreakIterator* U_EXPORT2
createLineInstance(const Locale& where, UErrorCode& status);
/**
* Create BreakIterator for character-breaks using specified locale
* Returns an instance of a BreakIterator implementing character breaks.
* Character breaks are boundaries of combining character sequences.
* @param where the locale.
* @param status The error code.
* @return A BreakIterator for character-breaks. The UErrorCode& status
* parameter is used to return status information to the user.
* To check whether the construction succeeded or not, you should check
* the value of U_SUCCESS(err). If you wish more detailed information, you
* can check for informational error results which still indicate success.
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
* used; neither the requested locale nor any of its fall back locales
* could be found.
* The caller owns the returned object and is responsible for deleting it.
* @stable ICU 2.0
*/
static BreakIterator* U_EXPORT2
createCharacterInstance(const Locale& where, UErrorCode& status);
/**
* Create BreakIterator for sentence-breaks using specified locale
* Returns an instance of a BreakIterator implementing sentence breaks.
* @param where the locale.
* @param status The error code.
* @return A BreakIterator for sentence-breaks. The UErrorCode& status
* parameter is used to return status information to the user.
* To check whether the construction succeeded or not, you should check
* the value of U_SUCCESS(err). If you wish more detailed information, you
* can check for informational error results which still indicate success.
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
* used; neither the requested locale nor any of its fall back locales
* could be found.
* The caller owns the returned object and is responsible for deleting it.
* @stable ICU 2.0
*/
static BreakIterator* U_EXPORT2
createSentenceInstance(const Locale& where, UErrorCode& status);
#ifndef U_HIDE_DEPRECATED_API
/**
* Create BreakIterator for title-casing breaks using the specified locale
* Returns an instance of a BreakIterator implementing title breaks.
* The iterator returned locates title boundaries as described for
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
* please use a word boundary iterator. See {@link #createWordInstance }.
*
* @param where the locale.
* @param status The error code.
* @return A BreakIterator for title-breaks. The UErrorCode& status
* parameter is used to return status information to the user.
* To check whether the construction succeeded or not, you should check
* the value of U_SUCCESS(err). If you wish more detailed information, you
* can check for informational error results which still indicate success.
* U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
* example, 'de_CH' was requested, but nothing was found there, so 'de' was
* used. U_USING_DEFAULT_WARNING indicates that the default locale data was
* used; neither the requested locale nor any of its fall back locales
* could be found.
* The caller owns the returned object and is responsible for deleting it.
* @deprecated ICU 64 Use createWordInstance instead.
*/
static BreakIterator* U_EXPORT2
createTitleInstance(const Locale& where, UErrorCode& status);
#endif /* U_HIDE_DEPRECATED_API */
/**
* Get the set of Locales for which TextBoundaries are installed.
* <p><b>Note:</b> this will not return locales added through the register
* call. To see the registered locales too, use the getAvailableLocales
* function that returns a StringEnumeration object </p>
* @param count the output parameter of number of elements in the locale list
* @return available locales
* @stable ICU 2.0
*/
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
/**
* Get name of the object for the desired Locale, in the desired language.
* @param objectLocale must be from getAvailableLocales.
* @param displayLocale specifies the desired locale for output.
* @param name the fill-in parameter of the return value
* Uses best match.
* @return user-displayable name
* @stable ICU 2.0
*/
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
const Locale& displayLocale,
UnicodeString& name);
/**
* Get name of the object for the desired Locale, in the language of the
* default locale.
* @param objectLocale must be from getMatchingLocales
* @param name the fill-in parameter of the return value
* @return user-displayable name
* @stable ICU 2.0
*/
static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
UnicodeString& name);
#ifndef U_FORCE_HIDE_DEPRECATED_API
/**
* Deprecated functionality. Use clone() instead.
*
* Thread safe client-buffer-based cloning operation
* Do NOT call delete on a safeclone, since 'new' is not used to create it.
* @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
* If buffer is not large enough, new memory will be allocated.
* @param BufferSize reference to size of allocated space.
* If BufferSize == 0, a sufficient size for use in cloning will
* be returned ('pre-flighting')
* If BufferSize is not enough for a stack-based safe clone,
* new memory will be allocated.
* @param status to indicate whether the operation went on smoothly or there were errors
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
* necessary.
* @return pointer to the new clone
*
* @deprecated ICU 52. Use clone() instead.
*/
virtual BreakIterator * createBufferClone(void *stackBuffer,
int32_t &BufferSize,
UErrorCode &status) = 0;
#endif // U_FORCE_HIDE_DEPRECATED_API
#ifndef U_HIDE_DEPRECATED_API
/**
* Determine whether the BreakIterator was created in user memory by
* createBufferClone(), and thus should not be deleted. Such objects
* must be closed by an explicit call to the destructor (not delete).
* @deprecated ICU 52. Always delete the BreakIterator.
*/
inline UBool isBufferClone(void);
#endif /* U_HIDE_DEPRECATED_API */
#if !UCONFIG_NO_SERVICE
/**
* Register a new break iterator of the indicated kind, to use in the given locale.
* The break iterator will be adopted. Clones of the iterator will be returned
* if a request for a break iterator of the given kind matches or falls back to
* this locale.
* Because ICU may choose to cache BreakIterators internally, this must
* be called at application startup, prior to any calls to
* BreakIterator::createXXXInstance to avoid undefined behavior.
* @param toAdopt the BreakIterator instance to be adopted
* @param locale the Locale for which this instance is to be registered
* @param kind the type of iterator for which this instance is to be registered
* @param status the in/out status code, no special meanings are assigned
* @return a registry key that can be used to unregister this instance
* @stable ICU 2.4
*/
static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
const Locale& locale,
UBreakIteratorType kind,
UErrorCode& status);
/**
* Unregister a previously-registered BreakIterator using the key returned from the
* register call. Key becomes invalid after a successful call and should not be used again.
* The BreakIterator corresponding to the key will be deleted.
* Because ICU may choose to cache BreakIterators internally, this should
* be called during application shutdown, after all calls to
* BreakIterator::createXXXInstance to avoid undefined behavior.
* @param key the registry key returned by a previous call to registerInstance
* @param status the in/out status code, no special meanings are assigned
* @return true if the iterator for the key was successfully unregistered
* @stable ICU 2.4
*/
static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
/**
* Return a StringEnumeration over the locales available at the time of the call,
* including registered locales.
* @return a StringEnumeration over the locales available at the time of the call
* @stable ICU 2.4
*/
static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
#endif
/**
* Returns the locale for this break iterator. Two flavors are available: valid and
* actual locale.
* @stable ICU 2.8
*/
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
#ifndef U_HIDE_INTERNAL_API
/** Get the locale for this break iterator object. You can choose between valid and actual locale.
* @param type type of the locale we're looking for (valid or actual)
* @param status error code for the operation
* @return the locale
* @internal
*/
const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
#endif /* U_HIDE_INTERNAL_API */
/**
* Set the subject text string upon which the break iterator is operating
* without changing any other aspect of the matching state.
* The new and previous text strings must have the same content.
*
* This function is intended for use in environments where ICU is operating on
* strings that may move around in memory. It provides a mechanism for notifying
* ICU that the string has been relocated, and providing a new UText to access the
* string in its new position.
*
* Note that the break iterator implementation never copies the underlying text
* of a string being processed, but always operates directly on the original text
* provided by the user. Refreshing simply drops the references to the old text
* and replaces them with references to the new.
*
* Caution: this function is normally used only by very specialized,
* system-level code. One example use case is with garbage collection that moves
* the text in memory.
*
* @param input The new (moved) text string.
* @param status Receives errors detected by this function.
* @return *this
*
* @stable ICU 49
*/
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
private:
static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
friend class ICUBreakIteratorFactory;
friend class ICUBreakIteratorService;
protected:
// Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
// or else the compiler will create a public ones.
/** @internal */
BreakIterator();
/** @internal */
BreakIterator (const BreakIterator &other);
#ifndef U_HIDE_INTERNAL_API
/** @internal */
BreakIterator (const Locale& valid, const Locale &actual);
/** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
BreakIterator &operator = (const BreakIterator &other);
#endif /* U_HIDE_INTERNAL_API */
private:
/** @internal (private) */
char actualLocale[ULOC_FULLNAME_CAPACITY];
char validLocale[ULOC_FULLNAME_CAPACITY];
};
#ifndef U_HIDE_DEPRECATED_API
inline UBool BreakIterator::isBufferClone()
{
return false;
}
#endif /* U_HIDE_DEPRECATED_API */
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // BRKITER_H
//eof

View File

@@ -0,0 +1,307 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Copyright (C) 2009-2012, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sanjay@google.com (Sanjay Ghemawat)
//
// Abstract interface that consumes a sequence of bytes (ByteSink).
//
// Used so that we can write a single piece of code that can operate
// on a variety of output string types.
//
// Various implementations of this interface are provided:
// ByteSink:
// CheckedArrayByteSink Write to a flat array, with bounds checking
// StringByteSink Write to an STL string
// This code is a contribution of Google code, and the style used here is
// a compromise between the original Google code and the ICU coding guidelines.
// For example, data types are ICU-ified (size_t,int->int32_t),
// and API comments doxygen-ified, but function names and behavior are
// as in the original, if possible.
// Assertion-style error handling, not available in ICU, was changed to
// parameter "pinning" similar to UnicodeString.
//
// In addition, this is only a partial port of the original Google code,
// limited to what was needed so far. The (nearly) complete original code
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
// (see ICU ticket 6765, r25517).
#ifndef __BYTESTREAM_H__
#define __BYTESTREAM_H__
/**
* \file
* \brief C++ API: Interface for writing bytes, and implementation classes.
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/uobject.h"
#include "unicode/std_string.h"
U_NAMESPACE_BEGIN
/**
* A ByteSink can be filled with bytes.
* @stable ICU 4.2
*/
class U_COMMON_API ByteSink : public UMemory {
public:
/**
* Default constructor.
* @stable ICU 4.2
*/
ByteSink() { }
/**
* Virtual destructor.
* @stable ICU 4.2
*/
virtual ~ByteSink();
/**
* Append "bytes[0,n-1]" to this.
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 4.2
*/
virtual void Append(const char* bytes, int32_t n) = 0;
/**
* Appends n bytes to this. Same as Append().
* Call AppendU8() with u8"string literals" which are const char * in C++11
* but const char8_t * in C++20.
* If the compiler does support char8_t as a distinct type,
* then an AppendU8() overload for that is defined and will be chosen.
*
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 67
*/
inline void AppendU8(const char* bytes, int32_t n) {
Append(bytes, n);
}
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
* Call AppendU8() with u8"string literals" which are const char * in C++11
* but const char8_t * in C++20.
* If the compiler does support char8_t as a distinct type,
* then this AppendU8() overload for that is defined and will be chosen.
*
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 67
*/
inline void AppendU8(const char8_t* bytes, int32_t n) {
Append(reinterpret_cast<const char*>(bytes), n);
}
#endif
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* *result_capacity. Guarantees *result_capacity>=min_capacity.
* May return a pointer to the caller-owned scratch buffer which must have
* scratch_capacity>=min_capacity.
* The returned buffer is only valid until the next operation
* on this ByteSink.
*
* After writing at most *result_capacity bytes, call Append() with the
* pointer returned from this function and the number of bytes written.
* Many Append() implementations will avoid copying bytes if this function
* returned an internal buffer.
*
* Partial usage example:
* int32_t capacity;
* char* buffer = sink->GetAppendBuffer(..., &capacity);
* ... Write n bytes into buffer, with n <= capacity.
* sink->Append(buffer, n);
* In many implementations, that call to Append will avoid copying bytes.
*
* If the ByteSink allocates or reallocates an internal buffer, it should use
* the desired_capacity_hint if appropriate.
* If a caller cannot provide a reasonable guess at the desired capacity,
* it should pass desired_capacity_hint=0.
*
* If a non-scratch buffer is returned, the caller may only pass
* a prefix to it to Append().
* That is, it is not correct to pass an interior pointer to Append().
*
* The default implementation always returns the scratch buffer.
*
* @param min_capacity required minimum capacity of the returned buffer;
* must be non-negative
* @param desired_capacity_hint desired capacity of the returned buffer;
* must be non-negative
* @param scratch default caller-owned buffer
* @param scratch_capacity capacity of the scratch buffer
* @param result_capacity pointer to an integer which will be set to the
* capacity of the returned buffer
* @return a buffer with *result_capacity>=min_capacity
* @stable ICU 4.2
*/
virtual char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch, int32_t scratch_capacity,
int32_t* result_capacity);
/**
* Flush internal buffers.
* Some byte sinks use internal buffers or provide buffering
* and require calling Flush() at the end of the stream.
* The ByteSink should be ready for further Append() calls after Flush().
* The default implementation of Flush() does nothing.
* @stable ICU 4.2
*/
virtual void Flush();
private:
ByteSink(const ByteSink &) = delete;
ByteSink &operator=(const ByteSink &) = delete;
};
// -------------------------------------------------------------
// Some standard implementations
/**
* Implementation of ByteSink that writes to a flat byte array,
* with bounds-checking:
* This sink will not write more than capacity bytes to outbuf.
* If more than capacity bytes are Append()ed, then excess bytes are ignored,
* and Overflowed() will return true.
* Overflow does not cause a runtime error.
* @stable ICU 4.2
*/
class U_COMMON_API CheckedArrayByteSink : public ByteSink {
public:
/**
* Constructs a ByteSink that will write to outbuf[0..capacity-1].
* @param outbuf buffer to write to
* @param capacity size of the buffer
* @stable ICU 4.2
*/
CheckedArrayByteSink(char* outbuf, int32_t capacity);
/**
* Destructor.
* @stable ICU 4.2
*/
virtual ~CheckedArrayByteSink();
/**
* Returns the sink to its original state, without modifying the buffer.
* Useful for reusing both the buffer and the sink for multiple streams.
* Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
* and Overflowed()=false.
* @return *this
* @stable ICU 4.6
*/
virtual CheckedArrayByteSink& Reset();
/**
* Append "bytes[0,n-1]" to this.
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 4.2
*/
virtual void Append(const char* bytes, int32_t n);
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* *result_capacity. For details see the base class documentation.
* @param min_capacity required minimum capacity of the returned buffer;
* must be non-negative
* @param desired_capacity_hint desired capacity of the returned buffer;
* must be non-negative
* @param scratch default caller-owned buffer
* @param scratch_capacity capacity of the scratch buffer
* @param result_capacity pointer to an integer which will be set to the
* capacity of the returned buffer
* @return a buffer with *result_capacity>=min_capacity
* @stable ICU 4.2
*/
virtual char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch, int32_t scratch_capacity,
int32_t* result_capacity);
/**
* Returns the number of bytes actually written to the sink.
* @return number of bytes written to the buffer
* @stable ICU 4.2
*/
int32_t NumberOfBytesWritten() const { return size_; }
/**
* Returns true if any bytes were discarded, i.e., if there was an
* attempt to write more than 'capacity' bytes.
* @return true if more than 'capacity' bytes were Append()ed
* @stable ICU 4.2
*/
UBool Overflowed() const { return overflowed_; }
/**
* Returns the number of bytes appended to the sink.
* If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
* else they return the same number.
* @return number of bytes written to the buffer
* @stable ICU 4.6
*/
int32_t NumberOfBytesAppended() const { return appended_; }
private:
char* outbuf_;
const int32_t capacity_;
int32_t size_;
int32_t appended_;
UBool overflowed_;
CheckedArrayByteSink() = delete;
CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
};
/**
* Implementation of ByteSink that writes to a "string".
* The StringClass is usually instantiated with a std::string.
* @stable ICU 4.2
*/
template<typename StringClass>
class StringByteSink : public ByteSink {
public:
/**
* Constructs a ByteSink that will append bytes to the dest string.
* @param dest pointer to string object to append to
* @stable ICU 4.2
*/
StringByteSink(StringClass* dest) : dest_(dest) { }
/**
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
*
* @param dest pointer to string object to append to
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
* @stable ICU 60
*/
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
if (initialAppendCapacity > 0 &&
(uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
dest->reserve(dest->length() + initialAppendCapacity);
}
}
/**
* Append "bytes[0,n-1]" to this.
* @param data the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 4.2
*/
virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
private:
StringClass* dest_;
StringByteSink() = delete;
StringByteSink(const StringByteSink &) = delete;
StringByteSink &operator=(const StringByteSink &) = delete;
};
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // __BYTESTREAM_H__

View File

@@ -0,0 +1,313 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// char16ptr.h
// created: 2017feb28 Markus W. Scherer
#ifndef __CHAR16PTR_H__
#define __CHAR16PTR_H__
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include <cstddef>
/**
* \file
* \brief C++ API: char16_t pointer wrappers with
* implicit conversion from bit-compatible raw pointer types.
* Also conversion functions from char16_t * to UChar * and OldUChar *.
*/
U_NAMESPACE_BEGIN
/**
* \def U_ALIASING_BARRIER
* Barrier for pointer anti-aliasing optimizations even across function boundaries.
* @internal
*/
#ifdef U_ALIASING_BARRIER
// Use the predefined value.
#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
#elif defined(U_IN_DOXYGEN)
# define U_ALIASING_BARRIER(ptr)
#endif
/**
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
* @stable ICU 59
*/
class U_COMMON_API Char16Ptr U_FINAL {
public:
/**
* Copies the pointer.
* @param p pointer
* @stable ICU 59
*/
inline Char16Ptr(char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
* @stable ICU 59
*/
inline Char16Ptr(uint16_t *p);
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* Converts the pointer to char16_t *.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* @param p pointer to be converted
* @stable ICU 59
*/
inline Char16Ptr(wchar_t *p);
#endif
/**
* nullptr constructor.
* @param p nullptr
* @stable ICU 59
*/
inline Char16Ptr(std::nullptr_t p);
/**
* Destructor.
* @stable ICU 59
*/
inline ~Char16Ptr();
/**
* Pointer access.
* @return the wrapped pointer
* @stable ICU 59
*/
inline char16_t *get() const;
/**
* char16_t pointer access via type conversion (e.g., static_cast).
* @return the wrapped pointer
* @stable ICU 59
*/
inline operator char16_t *() const { return get(); }
private:
Char16Ptr() = delete;
#ifdef U_ALIASING_BARRIER
template<typename T> static char16_t *cast(T *t) {
U_ALIASING_BARRIER(t);
return reinterpret_cast<char16_t *>(t);
}
char16_t *p_;
#else
union {
char16_t *cp;
uint16_t *up;
wchar_t *wp;
} u_;
#endif
};
/// \cond
#ifdef U_ALIASING_BARRIER
Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
#endif
Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
Char16Ptr::~Char16Ptr() {
U_ALIASING_BARRIER(p_);
}
char16_t *Char16Ptr::get() const { return p_; }
#else
Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
#endif
Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
Char16Ptr::~Char16Ptr() {}
char16_t *Char16Ptr::get() const { return u_.cp; }
#endif
/// \endcond
/**
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
* @stable ICU 59
*/
class U_COMMON_API ConstChar16Ptr U_FINAL {
public:
/**
* Copies the pointer.
* @param p pointer
* @stable ICU 59
*/
inline ConstChar16Ptr(const char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
* @stable ICU 59
*/
inline ConstChar16Ptr(const uint16_t *p);
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* Converts the pointer to char16_t *.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* @param p pointer to be converted
* @stable ICU 59
*/
inline ConstChar16Ptr(const wchar_t *p);
#endif
/**
* nullptr constructor.
* @param p nullptr
* @stable ICU 59
*/
inline ConstChar16Ptr(const std::nullptr_t p);
/**
* Destructor.
* @stable ICU 59
*/
inline ~ConstChar16Ptr();
/**
* Pointer access.
* @return the wrapped pointer
* @stable ICU 59
*/
inline const char16_t *get() const;
/**
* char16_t pointer access via type conversion (e.g., static_cast).
* @return the wrapped pointer
* @stable ICU 59
*/
inline operator const char16_t *() const { return get(); }
private:
ConstChar16Ptr() = delete;
#ifdef U_ALIASING_BARRIER
template<typename T> static const char16_t *cast(const T *t) {
U_ALIASING_BARRIER(t);
return reinterpret_cast<const char16_t *>(t);
}
const char16_t *p_;
#else
union {
const char16_t *cp;
const uint16_t *up;
const wchar_t *wp;
} u_;
#endif
};
/// \cond
#ifdef U_ALIASING_BARRIER
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
#endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
ConstChar16Ptr::~ConstChar16Ptr() {
U_ALIASING_BARRIER(p_);
}
const char16_t *ConstChar16Ptr::get() const { return p_; }
#else
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
#endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
ConstChar16Ptr::~ConstChar16Ptr() {}
const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
#endif
/// \endcond
/**
* Converts from const char16_t * to const UChar *.
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as const UChar *
* @stable ICU 59
*/
inline const UChar *toUCharPtr(const char16_t *p) {
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<const UChar *>(p);
}
/**
* Converts from char16_t * to UChar *.
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as UChar *
* @stable ICU 59
*/
inline UChar *toUCharPtr(char16_t *p) {
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<UChar *>(p);
}
/**
* Converts from const char16_t * to const OldUChar *.
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as const OldUChar *
* @stable ICU 59
*/
inline const OldUChar *toOldUCharPtr(const char16_t *p) {
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<const OldUChar *>(p);
}
/**
* Converts from char16_t * to OldUChar *.
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as OldUChar *
* @stable ICU 59
*/
inline OldUChar *toOldUCharPtr(char16_t *p) {
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<OldUChar *>(p);
}
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // __CHAR16PTR_H__

View File

@@ -0,0 +1,734 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
********************************************************************
*
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************
*/
#ifndef CHARITER_H
#define CHARITER_H
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/uobject.h"
#include "unicode/unistr.h"
/**
* \file
* \brief C++ API: Character Iterator
*/
U_NAMESPACE_BEGIN
/**
* Abstract class that defines an API for forward-only iteration
* on text objects.
* This is a minimal interface for iteration without random access
* or backwards iteration. It is especially useful for wrapping
* streams with converters into an object for collation or
* normalization.
*
* <p>Characters can be accessed in two ways: as code units or as
* code points.
* Unicode code points are 21-bit integers and are the scalar values
* of Unicode characters. ICU uses the type UChar32 for them.
* Unicode code units are the storage units of a given
* Unicode/UCS Transformation Format (a character encoding scheme).
* With UTF-16, all code points can be represented with either one
* or two code units ("surrogates").
* String storage is typically based on code units, while properties
* of characters are typically determined using code point values.
* Some processes may be designed to work with sequences of code units,
* or it may be known that all characters that are important to an
* algorithm can be represented with single code units.
* Other processes will need to use the code point access functions.</p>
*
* <p>ForwardCharacterIterator provides nextPostInc() to access
* a code unit and advance an internal position into the text object,
* similar to a <code>return text[position++]</code>.<br>
* It provides next32PostInc() to access a code point and advance an internal
* position.</p>
*
* <p>next32PostInc() assumes that the current position is that of
* the beginning of a code point, i.e., of its first code unit.
* After next32PostInc(), this will be true again.
* In general, access to code units and code points in the same
* iteration loop should not be mixed. In UTF-16, if the current position
* is on a second code unit (Low Surrogate), then only that code unit
* is returned even by next32PostInc().</p>
*
* <p>For iteration with either function, there are two ways to
* check for the end of the iteration. When there are no more
* characters in the text object:
* <ul>
* <li>The hasNext() function returns false.</li>
* <li>nextPostInc() and next32PostInc() return DONE
* when one attempts to read beyond the end of the text object.</li>
* </ul>
*
* Example:
* \code
* void function1(ForwardCharacterIterator &it) {
* UChar32 c;
* while(it.hasNext()) {
* c=it.next32PostInc();
* // use c
* }
* }
*
* void function1(ForwardCharacterIterator &it) {
* char16_t c;
* while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
* // use c
* }
* }
* \endcode
* </p>
*
* @stable ICU 2.0
*/
class U_COMMON_API ForwardCharacterIterator : public UObject {
public:
/**
* Value returned by most of ForwardCharacterIterator's functions
* when the iterator has reached the limits of its iteration.
* @stable ICU 2.0
*/
enum { DONE = 0xffff };
/**
* Destructor.
* @stable ICU 2.0
*/
virtual ~ForwardCharacterIterator();
/**
* Returns true when both iterators refer to the same
* character in the same character-storage object.
* @param that The ForwardCharacterIterator to be compared for equality
* @return true when both iterators refer to the same
* character in the same character-storage object
* @stable ICU 2.0
*/
virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
/**
* Returns true when the iterators refer to different
* text-storage objects, or to different characters in the
* same text-storage object.
* @param that The ForwardCharacterIterator to be compared for inequality
* @return true when the iterators refer to different
* text-storage objects, or to different characters in the
* same text-storage object
* @stable ICU 2.0
*/
inline UBool operator!=(const ForwardCharacterIterator& that) const;
/**
* Generates a hash code for this iterator.
* @return the hash code.
* @stable ICU 2.0
*/
virtual int32_t hashCode(void) const = 0;
/**
* Returns a UClassID for this ForwardCharacterIterator ("poor man's
* RTTI").<P> Despite the fact that this function is public,
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
* @return a UClassID for this ForwardCharacterIterator
* @stable ICU 2.0
*/
virtual UClassID getDynamicClassID(void) const = 0;
/**
* Gets the current code unit for returning and advances to the next code unit
* in the iteration range
* (toward endIndex()). If there are
* no more code units to return, returns DONE.
* @return the current code unit.
* @stable ICU 2.0
*/
virtual char16_t nextPostInc(void) = 0;
/**
* Gets the current code point for returning and advances to the next code point
* in the iteration range
* (toward endIndex()). If there are
* no more code points to return, returns DONE.
* @return the current code point.
* @stable ICU 2.0
*/
virtual UChar32 next32PostInc(void) = 0;
/**
* Returns false if there are no more code units or code points
* at or after the current position in the iteration range.
* This is used with nextPostInc() or next32PostInc() in forward
* iteration.
* @returns false if there are no more code units or code points
* at or after the current position in the iteration range.
* @stable ICU 2.0
*/
virtual UBool hasNext() = 0;
protected:
/** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
ForwardCharacterIterator();
/** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
ForwardCharacterIterator(const ForwardCharacterIterator &other);
/**
* Assignment operator to be overridden in the implementing class.
* @stable ICU 2.0
*/
ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
};
/**
* Abstract class that defines an API for iteration
* on text objects.
* This is an interface for forward and backward iteration
* and random access into a text object.
*
* <p>The API provides backward compatibility to the Java and older ICU
* CharacterIterator classes but extends them significantly:
* <ol>
* <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
* <li>While the old API functions provided forward iteration with
* "pre-increment" semantics, the new one also provides functions
* with "post-increment" semantics. They are more efficient and should
* be the preferred iterator functions for new implementations.
* The backward iteration always had "pre-decrement" semantics, which
* are efficient.</li>
* <li>Just like ForwardCharacterIterator, it provides access to
* both code units and code points. Code point access versions are available
* for the old and the new iteration semantics.</li>
* <li>There are new functions for setting and moving the current position
* without returning a character, for efficiency.</li>
* </ol>
*
* See ForwardCharacterIterator for examples for using the new forward iteration
* functions. For backward iteration, there is also a hasPrevious() function
* that can be used analogously to hasNext().
* The old functions work as before and are shown below.</p>
*
* <p>Examples for some of the new functions:</p>
*
* Forward iteration with hasNext():
* \code
* void forward1(CharacterIterator &it) {
* UChar32 c;
* for(it.setToStart(); it.hasNext();) {
* c=it.next32PostInc();
* // use c
* }
* }
* \endcode
* Forward iteration more similar to loops with the old forward iteration,
* showing a way to convert simple for() loops:
* \code
* void forward2(CharacterIterator &it) {
* char16_t c;
* for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
* // use c
* }
* }
* \endcode
* Backward iteration with setToEnd() and hasPrevious():
* \code
* void backward1(CharacterIterator &it) {
* UChar32 c;
* for(it.setToEnd(); it.hasPrevious();) {
* c=it.previous32();
* // use c
* }
* }
* \endcode
* Backward iteration with a more traditional for() loop:
* \code
* void backward2(CharacterIterator &it) {
* char16_t c;
* for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
* // use c
* }
* }
* \endcode
*
* Example for random access:
* \code
* void random(CharacterIterator &it) {
* // set to the third code point from the beginning
* it.move32(3, CharacterIterator::kStart);
* // get a code point from here without moving the position
* UChar32 c=it.current32();
* // get the position
* int32_t pos=it.getIndex();
* // get the previous code unit
* char16_t u=it.previous();
* // move back one more code unit
* it.move(-1, CharacterIterator::kCurrent);
* // set the position back to where it was
* // and read the same code point c and move beyond it
* it.setIndex(pos);
* if(c!=it.next32PostInc()) {
* exit(1); // CharacterIterator inconsistent
* }
* }
* \endcode
*
* <p>Examples, especially for the old API:</p>
*
* Function processing characters, in this example simple output
* <pre>
* \code
* void processChar( char16_t c )
* {
* cout << " " << c;
* }
* \endcode
* </pre>
* Traverse the text from start to finish
* <pre>
* \code
* void traverseForward(CharacterIterator& iter)
* {
* for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
* processChar(c);
* }
* }
* \endcode
* </pre>
* Traverse the text backwards, from end to start
* <pre>
* \code
* void traverseBackward(CharacterIterator& iter)
* {
* for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
* processChar(c);
* }
* }
* \endcode
* </pre>
* Traverse both forward and backward from a given position in the text.
* Calls to notBoundary() in this example represents some additional stopping criteria.
* <pre>
* \code
* void traverseOut(CharacterIterator& iter, int32_t pos)
* {
* char16_t c;
* for (c = iter.setIndex(pos);
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
* c = iter.next()) {}
* int32_t end = iter.getIndex();
* for (c = iter.setIndex(pos);
* c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
* c = iter.previous()) {}
* int32_t start = iter.getIndex() + 1;
*
* cout << "start: " << start << " end: " << end << endl;
* for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
* processChar(c);
* }
* }
* \endcode
* </pre>
* Creating a StringCharacterIterator and calling the test functions
* <pre>
* \code
* void CharacterIterator_Example( void )
* {
* cout << endl << "===== CharacterIterator_Example: =====" << endl;
* UnicodeString text("Ein kleiner Satz.");
* StringCharacterIterator iterator(text);
* cout << "----- traverseForward: -----------" << endl;
* traverseForward( iterator );
* cout << endl << endl << "----- traverseBackward: ----------" << endl;
* traverseBackward( iterator );
* cout << endl << endl << "----- traverseOut: ---------------" << endl;
* traverseOut( iterator, 7 );
* cout << endl << endl << "-----" << endl;
* }
* \endcode
* </pre>
*
* @stable ICU 2.0
*/
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
public:
/**
* Origin enumeration for the move() and move32() functions.
* @stable ICU 2.0
*/
enum EOrigin { kStart, kCurrent, kEnd };
/**
* Destructor.
* @stable ICU 2.0
*/
virtual ~CharacterIterator();
/**
* Returns a pointer to a new CharacterIterator of the same
* concrete class as this one, and referring to the same
* character in the same text-storage object as this one. The
* caller is responsible for deleting the new clone.
* @return a pointer to a new CharacterIterator
* @stable ICU 2.0
*/
virtual CharacterIterator* clone() const = 0;
/**
* Sets the iterator to refer to the first code unit in its
* iteration range, and returns that code unit.
* This can be used to begin an iteration with next().
* @return the first code unit in its iteration range.
* @stable ICU 2.0
*/
virtual char16_t first(void) = 0;
/**
* Sets the iterator to refer to the first code unit in its
* iteration range, returns that code unit, and moves the position
* to the second code unit. This is an alternative to setToStart()
* for forward iteration with nextPostInc().
* @return the first code unit in its iteration range.
* @stable ICU 2.0
*/
virtual char16_t firstPostInc(void);
/**
* Sets the iterator to refer to the first code point in its
* iteration range, and returns that code unit,
* This can be used to begin an iteration with next32().
* Note that an iteration with next32PostInc(), beginning with,
* e.g., setToStart() or firstPostInc(), is more efficient.
* @return the first code point in its iteration range.
* @stable ICU 2.0
*/
virtual UChar32 first32(void) = 0;
/**
* Sets the iterator to refer to the first code point in its
* iteration range, returns that code point, and moves the position
* to the second code point. This is an alternative to setToStart()
* for forward iteration with next32PostInc().
* @return the first code point in its iteration range.
* @stable ICU 2.0
*/
virtual UChar32 first32PostInc(void);
/**
* Sets the iterator to refer to the first code unit or code point in its
* iteration range. This can be used to begin a forward
* iteration with nextPostInc() or next32PostInc().
* @return the start position of the iteration range
* @stable ICU 2.0
*/
inline int32_t setToStart();
/**
* Sets the iterator to refer to the last code unit in its
* iteration range, and returns that code unit.
* This can be used to begin an iteration with previous().
* @return the last code unit.
* @stable ICU 2.0
*/
virtual char16_t last(void) = 0;
/**
* Sets the iterator to refer to the last code point in its
* iteration range, and returns that code unit.
* This can be used to begin an iteration with previous32().
* @return the last code point.
* @stable ICU 2.0
*/
virtual UChar32 last32(void) = 0;
/**
* Sets the iterator to the end of its iteration range, just behind
* the last code unit or code point. This can be used to begin a backward
* iteration with previous() or previous32().
* @return the end position of the iteration range
* @stable ICU 2.0
*/
inline int32_t setToEnd();
/**
* Sets the iterator to refer to the "position"-th code unit
* in the text-storage object the iterator refers to, and
* returns that code unit.
* @param position the "position"-th code unit in the text-storage object
* @return the "position"-th code unit.
* @stable ICU 2.0
*/
virtual char16_t setIndex(int32_t position) = 0;
/**
* Sets the iterator to refer to the beginning of the code point
* that contains the "position"-th code unit
* in the text-storage object the iterator refers to, and
* returns that code point.
* The current position is adjusted to the beginning of the code point
* (its first code unit).
* @param position the "position"-th code unit in the text-storage object
* @return the "position"-th code point.
* @stable ICU 2.0
*/
virtual UChar32 setIndex32(int32_t position) = 0;
/**
* Returns the code unit the iterator currently refers to.
* @return the current code unit.
* @stable ICU 2.0
*/
virtual char16_t current(void) const = 0;
/**
* Returns the code point the iterator currently refers to.
* @return the current code point.
* @stable ICU 2.0
*/
virtual UChar32 current32(void) const = 0;
/**
* Advances to the next code unit in the iteration range
* (toward endIndex()), and returns that code unit. If there are
* no more code units to return, returns DONE.
* @return the next code unit.
* @stable ICU 2.0
*/
virtual char16_t next(void) = 0;
/**
* Advances to the next code point in the iteration range
* (toward endIndex()), and returns that code point. If there are
* no more code points to return, returns DONE.
* Note that iteration with "pre-increment" semantics is less
* efficient than iteration with "post-increment" semantics
* that is provided by next32PostInc().
* @return the next code point.
* @stable ICU 2.0
*/
virtual UChar32 next32(void) = 0;
/**
* Advances to the previous code unit in the iteration range
* (toward startIndex()), and returns that code unit. If there are
* no more code units to return, returns DONE.
* @return the previous code unit.
* @stable ICU 2.0
*/
virtual char16_t previous(void) = 0;
/**
* Advances to the previous code point in the iteration range
* (toward startIndex()), and returns that code point. If there are
* no more code points to return, returns DONE.
* @return the previous code point.
* @stable ICU 2.0
*/
virtual UChar32 previous32(void) = 0;
/**
* Returns false if there are no more code units or code points
* before the current position in the iteration range.
* This is used with previous() or previous32() in backward
* iteration.
* @return false if there are no more code units or code points
* before the current position in the iteration range, return true otherwise.
* @stable ICU 2.0
*/
virtual UBool hasPrevious() = 0;
/**
* Returns the numeric index in the underlying text-storage
* object of the character returned by first(). Since it's
* possible to create an iterator that iterates across only
* part of a text-storage object, this number isn't
* necessarily 0.
* @returns the numeric index in the underlying text-storage
* object of the character returned by first().
* @stable ICU 2.0
*/
inline int32_t startIndex(void) const;
/**
* Returns the numeric index in the underlying text-storage
* object of the position immediately BEYOND the character
* returned by last().
* @return the numeric index in the underlying text-storage
* object of the position immediately BEYOND the character
* returned by last().
* @stable ICU 2.0
*/
inline int32_t endIndex(void) const;
/**
* Returns the numeric index in the underlying text-storage
* object of the character the iterator currently refers to
* (i.e., the character returned by current()).
* @return the numeric index in the text-storage object of
* the character the iterator currently refers to
* @stable ICU 2.0
*/
inline int32_t getIndex(void) const;
/**
* Returns the length of the entire text in the underlying
* text-storage object.
* @return the length of the entire text in the text-storage object
* @stable ICU 2.0
*/
inline int32_t getLength() const;
/**
* Moves the current position relative to the start or end of the
* iteration range, or relative to the current position itself.
* The movement is expressed in numbers of code units forward
* or backward by specifying a positive or negative delta.
* @param delta the position relative to origin. A positive delta means forward;
* a negative delta means backward.
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
* @return the new position
* @stable ICU 2.0
*/
virtual int32_t move(int32_t delta, EOrigin origin) = 0;
/**
* Moves the current position relative to the start or end of the
* iteration range, or relative to the current position itself.
* The movement is expressed in numbers of code points forward
* or backward by specifying a positive or negative delta.
* @param delta the position relative to origin. A positive delta means forward;
* a negative delta means backward.
* @param origin Origin enumeration {kStart, kCurrent, kEnd}
* @return the new position
* @stable ICU 2.0
*/
#ifdef move32
// One of the system headers right now is sometimes defining a conflicting macro we don't use
#undef move32
#endif
virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
/**
* Copies the text under iteration into the UnicodeString
* referred to by "result".
* @param result Receives a copy of the text under iteration.
* @stable ICU 2.0
*/
virtual void getText(UnicodeString& result) = 0;
protected:
/**
* Empty constructor.
* @stable ICU 2.0
*/
CharacterIterator();
/**
* Constructor, just setting the length field in this base class.
* @stable ICU 2.0
*/
CharacterIterator(int32_t length);
/**
* Constructor, just setting the length and position fields in this base class.
* @stable ICU 2.0
*/
CharacterIterator(int32_t length, int32_t position);
/**
* Constructor, just setting the length, start, end, and position fields in this base class.
* @stable ICU 2.0
*/
CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
/**
* Copy constructor.
*
* @param that The CharacterIterator to be copied
* @stable ICU 2.0
*/
CharacterIterator(const CharacterIterator &that);
/**
* Assignment operator. Sets this CharacterIterator to have the same behavior,
* as the one passed in.
* @param that The CharacterIterator passed in.
* @return the newly set CharacterIterator.
* @stable ICU 2.0
*/
CharacterIterator &operator=(const CharacterIterator &that);
/**
* Base class text length field.
* Necessary this for correct getText() and hashCode().
* @stable ICU 2.0
*/
int32_t textLength;
/**
* Base class field for the current position.
* @stable ICU 2.0
*/
int32_t pos;
/**
* Base class field for the start of the iteration range.
* @stable ICU 2.0
*/
int32_t begin;
/**
* Base class field for the end of the iteration range.
* @stable ICU 2.0
*/
int32_t end;
};
inline UBool
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
return !operator==(that);
}
inline int32_t
CharacterIterator::setToStart() {
return move(0, kStart);
}
inline int32_t
CharacterIterator::setToEnd() {
return move(0, kEnd);
}
inline int32_t
CharacterIterator::startIndex(void) const {
return begin;
}
inline int32_t
CharacterIterator::endIndex(void) const {
return end;
}
inline int32_t
CharacterIterator::getIndex(void) const {
return pos;
}
inline int32_t
CharacterIterator::getLength(void) const {
return textLength;
}
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif

View File

@@ -0,0 +1,595 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2009-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: localpointer.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009nov13
* created by: Markus W. Scherer
*/
#ifndef __LOCALPOINTER_H__
#define __LOCALPOINTER_H__
/**
* \file
* \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
*
* These classes are inspired by
* - std::auto_ptr
* - boost::scoped_ptr & boost::scoped_array
* - Taligent Safe Pointers (TOnlyPointerTo)
*
* but none of those provide for all of the goals for ICU smart pointers:
* - Smart pointer owns the object and releases it when it goes out of scope.
* - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
* - ICU-compatible: No exceptions.
* - Need to be able to orphan/release the pointer and its ownership.
* - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
*
* For details see http://site.icu-project.org/design/cpp/scoped_ptr
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include <memory>
U_NAMESPACE_BEGIN
/**
* "Smart pointer" base class; do not use directly: use LocalPointer etc.
*
* Base class for smart pointer classes that do not throw exceptions.
*
* Do not use this base class directly, since it does not delete its pointer.
* A subclass must implement methods that delete the pointer:
* Destructor and adoptInstead().
*
* There is no operator T *() provided because the programmer must decide
* whether to use getAlias() (without transfer of ownership) or orphan()
* (with transfer of ownership and NULLing of the pointer).
*
* @see LocalPointer
* @see LocalArray
* @see U_DEFINE_LOCAL_OPEN_POINTER
* @stable ICU 4.4
*/
template<typename T>
class LocalPointerBase {
public:
// No heap allocation. Use only on the stack.
static void* U_EXPORT2 operator new(size_t) = delete;
static void* U_EXPORT2 operator new[](size_t) = delete;
#if U_HAVE_PLACEMENT_NEW
static void* U_EXPORT2 operator new(size_t, void*) = delete;
#endif
/**
* Constructor takes ownership.
* @param p simple pointer to an object that is adopted
* @stable ICU 4.4
*/
explicit LocalPointerBase(T *p=NULL) : ptr(p) {}
/**
* Destructor deletes the object it owns.
* Subclass must override: Base class does nothing.
* @stable ICU 4.4
*/
~LocalPointerBase() { /* delete ptr; */ }
/**
* NULL check.
* @return true if ==NULL
* @stable ICU 4.4
*/
UBool isNull() const { return ptr==NULL; }
/**
* NULL check.
* @return true if !=NULL
* @stable ICU 4.4
*/
UBool isValid() const { return ptr!=NULL; }
/**
* Comparison with a simple pointer, so that existing code
* with ==NULL need not be changed.
* @param other simple pointer for comparison
* @return true if this pointer value equals other
* @stable ICU 4.4
*/
bool operator==(const T *other) const { return ptr==other; }
/**
* Comparison with a simple pointer, so that existing code
* with !=NULL need not be changed.
* @param other simple pointer for comparison
* @return true if this pointer value differs from other
* @stable ICU 4.4
*/
bool operator!=(const T *other) const { return ptr!=other; }
/**
* Access without ownership change.
* @return the pointer value
* @stable ICU 4.4
*/
T *getAlias() const { return ptr; }
/**
* Access without ownership change.
* @return the pointer value as a reference
* @stable ICU 4.4
*/
T &operator*() const { return *ptr; }
/**
* Access without ownership change.
* @return the pointer value
* @stable ICU 4.4
*/
T *operator->() const { return ptr; }
/**
* Gives up ownership; the internal pointer becomes NULL.
* @return the pointer value;
* caller becomes responsible for deleting the object
* @stable ICU 4.4
*/
T *orphan() {
T *p=ptr;
ptr=NULL;
return p;
}
/**
* Deletes the object it owns,
* and adopts (takes ownership of) the one passed in.
* Subclass must override: Base class does not delete the object.
* @param p simple pointer to an object that is adopted
* @stable ICU 4.4
*/
void adoptInstead(T *p) {
// delete ptr;
ptr=p;
}
protected:
/**
* Actual pointer.
* @internal
*/
T *ptr;
private:
// No comparison operators with other LocalPointerBases.
bool operator==(const LocalPointerBase<T> &other);
bool operator!=(const LocalPointerBase<T> &other);
// No ownership sharing: No copy constructor, no assignment operator.
LocalPointerBase(const LocalPointerBase<T> &other);
void operator=(const LocalPointerBase<T> &other);
};
/**
* "Smart pointer" class, deletes objects via the standard C++ delete operator.
* For most methods see the LocalPointerBase base class.
*
* Usage example:
* \code
* LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
* int32_t length=s->length(); // 2
* char16_t lead=s->charAt(0); // 0xd900
* if(some condition) { return; } // no need to explicitly delete the pointer
* s.adoptInstead(new UnicodeString((char16_t)0xfffc));
* length=s->length(); // 1
* // no need to explicitly delete the pointer
* \endcode
*
* @see LocalPointerBase
* @stable ICU 4.4
*/
template<typename T>
class LocalPointer : public LocalPointerBase<T> {
public:
using LocalPointerBase<T>::operator*;
using LocalPointerBase<T>::operator->;
/**
* Constructor takes ownership.
* @param p simple pointer to an object that is adopted
* @stable ICU 4.4
*/
explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {}
/**
* Constructor takes ownership and reports an error if NULL.
*
* This constructor is intended to be used with other-class constructors
* that may report a failure UErrorCode,
* so that callers need to check only for U_FAILURE(errorCode)
* and not also separately for isNull().
*
* @param p simple pointer to an object that is adopted
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
* if p==NULL and no other failure code had been set
* @stable ICU 55
*/
LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
if(p==NULL && U_SUCCESS(errorCode)) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
* @stable ICU 56
*/
LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
/**
* Constructs a LocalPointer from a C++11 std::unique_ptr.
* The LocalPointer steals the object owned by the std::unique_ptr.
*
* This constructor works via move semantics. If your std::unique_ptr is
* in a local variable, you must use std::move.
*
* @param p The std::unique_ptr from which the pointer will be stolen.
* @stable ICU 64
*/
explicit LocalPointer(std::unique_ptr<T> &&p)
: LocalPointerBase<T>(p.release()) {}
/**
* Destructor deletes the object it owns.
* @stable ICU 4.4
*/
~LocalPointer() {
delete LocalPointerBase<T>::ptr;
}
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
* @param src source smart pointer
* @return *this
* @stable ICU 56
*/
LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT {
delete LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=src.ptr;
src.ptr=NULL;
return *this;
}
/**
* Move-assign from an std::unique_ptr to this LocalPointer.
* Steals the pointer from the std::unique_ptr.
*
* @param p The std::unique_ptr from which the pointer will be stolen.
* @return *this
* @stable ICU 64
*/
LocalPointer<T> &operator=(std::unique_ptr<T> &&p) U_NOEXCEPT {
adoptInstead(p.release());
return *this;
}
/**
* Swap pointers.
* @param other other smart pointer
* @stable ICU 56
*/
void swap(LocalPointer<T> &other) U_NOEXCEPT {
T *temp=LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=other.ptr;
other.ptr=temp;
}
/**
* Non-member LocalPointer swap function.
* @param p1 will get p2's pointer
* @param p2 will get p1's pointer
* @stable ICU 56
*/
friend inline void swap(LocalPointer<T> &p1, LocalPointer<T> &p2) U_NOEXCEPT {
p1.swap(p2);
}
/**
* Deletes the object it owns,
* and adopts (takes ownership of) the one passed in.
* @param p simple pointer to an object that is adopted
* @stable ICU 4.4
*/
void adoptInstead(T *p) {
delete LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
}
/**
* Deletes the object it owns,
* and adopts (takes ownership of) the one passed in.
*
* If U_FAILURE(errorCode), then the current object is retained and the new one deleted.
*
* If U_SUCCESS(errorCode) but the input pointer is NULL,
* then U_MEMORY_ALLOCATION_ERROR is set,
* the current object is deleted, and NULL is set.
*
* @param p simple pointer to an object that is adopted
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
* if p==NULL and no other failure code had been set
* @stable ICU 55
*/
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode)) {
delete LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
if(p==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
} else {
delete p;
}
}
/**
* Conversion operator to a C++11 std::unique_ptr.
* Disowns the object and gives it to the returned std::unique_ptr.
*
* This operator works via move semantics. If your LocalPointer is
* in a local variable, you must use std::move.
*
* @return An std::unique_ptr owning the pointer previously owned by this
* icu::LocalPointer.
* @stable ICU 64
*/
operator std::unique_ptr<T> () && {
return std::unique_ptr<T>(LocalPointerBase<T>::orphan());
}
};
/**
* "Smart pointer" class, deletes objects via the C++ array delete[] operator.
* For most methods see the LocalPointerBase base class.
* Adds operator[] for array item access.
*
* Usage example:
* \code
* LocalArray<UnicodeString> a(new UnicodeString[2]);
* a[0].append((char16_t)0x61);
* if(some condition) { return; } // no need to explicitly delete the array
* a.adoptInstead(new UnicodeString[4]);
* a[3].append((char16_t)0x62).append((char16_t)0x63).reverse();
* // no need to explicitly delete the array
* \endcode
*
* @see LocalPointerBase
* @stable ICU 4.4
*/
template<typename T>
class LocalArray : public LocalPointerBase<T> {
public:
using LocalPointerBase<T>::operator*;
using LocalPointerBase<T>::operator->;
/**
* Constructor takes ownership.
* @param p simple pointer to an array of T objects that is adopted
* @stable ICU 4.4
*/
explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {}
/**
* Constructor takes ownership and reports an error if NULL.
*
* This constructor is intended to be used with other-class constructors
* that may report a failure UErrorCode,
* so that callers need to check only for U_FAILURE(errorCode)
* and not also separately for isNull().
*
* @param p simple pointer to an array of T objects that is adopted
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
* if p==NULL and no other failure code had been set
* @stable ICU 56
*/
LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
if(p==NULL && U_SUCCESS(errorCode)) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
* @stable ICU 56
*/
LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
/**
* Constructs a LocalArray from a C++11 std::unique_ptr of an array type.
* The LocalPointer steals the array owned by the std::unique_ptr.
*
* This constructor works via move semantics. If your std::unique_ptr is
* in a local variable, you must use std::move.
*
* @param p The std::unique_ptr from which the array will be stolen.
* @stable ICU 64
*/
explicit LocalArray(std::unique_ptr<T[]> &&p)
: LocalPointerBase<T>(p.release()) {}
/**
* Destructor deletes the array it owns.
* @stable ICU 4.4
*/
~LocalArray() {
delete[] LocalPointerBase<T>::ptr;
}
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
* @param src source smart pointer
* @return *this
* @stable ICU 56
*/
LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT {
delete[] LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=src.ptr;
src.ptr=NULL;
return *this;
}
/**
* Move-assign from an std::unique_ptr to this LocalPointer.
* Steals the array from the std::unique_ptr.
*
* @param p The std::unique_ptr from which the array will be stolen.
* @return *this
* @stable ICU 64
*/
LocalArray<T> &operator=(std::unique_ptr<T[]> &&p) U_NOEXCEPT {
adoptInstead(p.release());
return *this;
}
/**
* Swap pointers.
* @param other other smart pointer
* @stable ICU 56
*/
void swap(LocalArray<T> &other) U_NOEXCEPT {
T *temp=LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=other.ptr;
other.ptr=temp;
}
/**
* Non-member LocalArray swap function.
* @param p1 will get p2's pointer
* @param p2 will get p1's pointer
* @stable ICU 56
*/
friend inline void swap(LocalArray<T> &p1, LocalArray<T> &p2) U_NOEXCEPT {
p1.swap(p2);
}
/**
* Deletes the array it owns,
* and adopts (takes ownership of) the one passed in.
* @param p simple pointer to an array of T objects that is adopted
* @stable ICU 4.4
*/
void adoptInstead(T *p) {
delete[] LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
}
/**
* Deletes the array it owns,
* and adopts (takes ownership of) the one passed in.
*
* If U_FAILURE(errorCode), then the current array is retained and the new one deleted.
*
* If U_SUCCESS(errorCode) but the input pointer is NULL,
* then U_MEMORY_ALLOCATION_ERROR is set,
* the current array is deleted, and NULL is set.
*
* @param p simple pointer to an array of T objects that is adopted
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
* if p==NULL and no other failure code had been set
* @stable ICU 56
*/
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode)) {
delete[] LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
if(p==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
} else {
delete[] p;
}
}
/**
* Array item access (writable).
* No index bounds check.
* @param i array index
* @return reference to the array item
* @stable ICU 4.4
*/
T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
/**
* Conversion operator to a C++11 std::unique_ptr.
* Disowns the object and gives it to the returned std::unique_ptr.
*
* This operator works via move semantics. If your LocalPointer is
* in a local variable, you must use std::move.
*
* @return An std::unique_ptr owning the pointer previously owned by this
* icu::LocalPointer.
* @stable ICU 64
*/
operator std::unique_ptr<T[]> () && {
return std::unique_ptr<T[]>(LocalPointerBase<T>::orphan());
}
};
/**
* \def U_DEFINE_LOCAL_OPEN_POINTER
* "Smart pointer" definition macro, deletes objects via the closeFunction.
* Defines a subclass of LocalPointerBase which works just
* like LocalPointer<Type> except that this subclass will use the closeFunction
* rather than the C++ delete operator.
*
* Usage example:
* \code
* LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
* utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
* utf8Out, (int32_t)sizeof(utf8Out),
* utf8In, utf8InLength, &errorCode);
* if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap
* \endcode
*
* @see LocalPointerBase
* @see LocalPointer
* @stable ICU 4.4
*/
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class LocalPointerClassName : public LocalPointerBase<Type> { \
public: \
using LocalPointerBase<Type>::operator*; \
using LocalPointerBase<Type>::operator->; \
explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
LocalPointerClassName(LocalPointerClassName &&src) U_NOEXCEPT \
: LocalPointerBase<Type>(src.ptr) { \
src.ptr=NULL; \
} \
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
explicit LocalPointerClassName(std::unique_ptr<Type, decltype(&closeFunction)> &&p) \
: LocalPointerBase<Type>(p.release()) {} \
~LocalPointerClassName() { if (ptr != NULL) { closeFunction(ptr); } } \
LocalPointerClassName &operator=(LocalPointerClassName &&src) U_NOEXCEPT { \
if (ptr != NULL) { closeFunction(ptr); } \
LocalPointerBase<Type>::ptr=src.ptr; \
src.ptr=NULL; \
return *this; \
} \
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
LocalPointerClassName &operator=(std::unique_ptr<Type, decltype(&closeFunction)> &&p) { \
adoptInstead(p.release()); \
return *this; \
} \
void swap(LocalPointerClassName &other) U_NOEXCEPT { \
Type *temp=LocalPointerBase<Type>::ptr; \
LocalPointerBase<Type>::ptr=other.ptr; \
other.ptr=temp; \
} \
friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \
p1.swap(p2); \
} \
void adoptInstead(Type *p) { \
if (ptr != NULL) { closeFunction(ptr); } \
ptr=p; \
} \
operator std::unique_ptr<Type, decltype(&closeFunction)> () && { \
return std::unique_ptr<Type, decltype(&closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction); \
} \
}
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif /* __LOCALPOINTER_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,94 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1999-2005, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 03/14/00 aliu Creation.
* 06/27/00 aliu Change from C++ class to C struct
**********************************************************************
*/
#ifndef PARSEERR_H
#define PARSEERR_H
#include "unicode/utypes.h"
/**
* \file
* \brief C API: Parse Error Information
*/
/**
* The capacity of the context strings in UParseError.
* @stable ICU 2.0
*/
enum { U_PARSE_CONTEXT_LEN = 16 };
/**
* A UParseError struct is used to returned detailed information about
* parsing errors. It is used by ICU parsing engines that parse long
* rules, patterns, or programs, where the text being parsed is long
* enough that more information than a UErrorCode is needed to
* localize the error.
*
* <p>The line, offset, and context fields are optional; parsing
* engines may choose not to use to use them.
*
* <p>The preContext and postContext strings include some part of the
* context surrounding the error. If the source text is "let for=7"
* and "for" is the error (e.g., because it is a reserved word), then
* some examples of what a parser might produce are the following:
*
* <pre>
* preContext postContext
* "" "" The parser does not support context
* "let " "=7" Pre- and post-context only
* "let " "for=7" Pre- and post-context and error text
* "" "for" Error text only
* </pre>
*
* <p>Examples of engines which use UParseError (or may use it in the
* future) are Transliterator, RuleBasedBreakIterator, and
* RegexPattern.
*
* @stable ICU 2.0
*/
typedef struct UParseError {
/**
* The line on which the error occurred. If the parser uses this
* field, it sets it to the line number of the source text line on
* which the error appears, which will be a value >= 1. If the
* parse does not support line numbers, the value will be <= 0.
* @stable ICU 2.0
*/
int32_t line;
/**
* The character offset to the error. If the line field is >= 1,
* then this is the offset from the start of the line. Otherwise,
* this is the offset from the start of the text. If the parser
* does not support this field, it will have a value < 0.
* @stable ICU 2.0
*/
int32_t offset;
/**
* Textual context before the error. Null-terminated. The empty
* string if not supported by parser.
* @stable ICU 2.0
*/
UChar preContext[U_PARSE_CONTEXT_LEN];
/**
* The error itself and/or textual context after the error.
* Null-terminated. The empty string if not supported by parser.
* @stable ICU 2.0
*/
UChar postContext[U_PARSE_CONTEXT_LEN];
} UParseError;
#endif

View File

@@ -0,0 +1,885 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : platform.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
******************************************************************************
*/
#ifndef _PLATFORM_H
#define _PLATFORM_H
#include "unicode/uconfig.h"
#include "unicode/uvernum.h"
/**
* \file
* \brief Basic types for the platform.
*
* This file used to be generated by autoconf/configure.
* Starting with ICU 49, platform.h is a normal source file,
* to simplify cross-compiling and working with non-autoconf/make build systems.
*
* When a value in this file does not work on a platform, then please
* try to derive it from the U_PLATFORM value
* (for which we might need a new value constant in rare cases)
* and/or from other macros that are predefined by the compiler
* or defined in standard (POSIX or platform or compiler) headers.
*
* As a temporary workaround, you can add an explicit \#define for some macros
* before it is first tested, or add an equivalent -D macro definition
* to the compiler's command line.
*
* Note: Some compilers provide ways to show the predefined macros.
* For example, with gcc you can compile an empty .c file and have the compiler
* print the predefined macros with
* \code
* gcc -E -dM -x c /dev/null | sort
* \endcode
* (You can provide an actual empty .c file rather than /dev/null.
* <code>-x c++</code> is for C++.)
*/
/**
* Define some things so that they can be documented.
* @internal
*/
#ifdef U_IN_DOXYGEN
/*
* Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
* Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
*/
/* None for now. */
#endif
/**
* \def U_PLATFORM
* The U_PLATFORM macro defines the platform we're on.
*
* We used to define one different, value-less macro per platform.
* That made it hard to know the set of relevant platforms and macros,
* and hard to deal with variants of platforms.
*
* Starting with ICU 49, we define platforms as numeric macros,
* with ranges of values for related platforms and their variants.
* The U_PLATFORM macro is set to one of these values.
*
* Historical note from the Solaris Wikipedia article:
* AT&T and Sun collaborated on a project to merge the most popular Unix variants
* on the market at that time: BSD, System V, and Xenix.
* This became Unix System V Release 4 (SVR4).
*
* @internal
*/
/** Unknown platform. @internal */
#define U_PF_UNKNOWN 0
/** Windows @internal */
#define U_PF_WINDOWS 1000
/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
#define U_PF_MINGW 1800
/**
* Cygwin. Windows, calls to cygwin1.dll for Posix functions,
* using MSVC or GNU gcc and binutils.
* @internal
*/
#define U_PF_CYGWIN 1900
/* Reserve 2000 for U_PF_UNIX? */
/** HP-UX is based on UNIX System V. @internal */
#define U_PF_HPUX 2100
/** Solaris is a Unix operating system based on SVR4. @internal */
#define U_PF_SOLARIS 2600
/** BSD is a UNIX operating system derivative. @internal */
#define U_PF_BSD 3000
/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
#define U_PF_AIX 3100
/** IRIX is based on UNIX System V with BSD extensions. @internal */
#define U_PF_IRIX 3200
/**
* Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
* as well as code derived from NeXTSTEP, BSD, and other projects,
* built around the Mach kernel.
* Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
* (Original description modified from WikiPedia.)
* @internal
*/
#define U_PF_DARWIN 3500
/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
#define U_PF_IPHONE 3550
/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
#define U_PF_QNX 3700
/** Linux is a Unix-like operating system. @internal */
#define U_PF_LINUX 4000
/**
* Native Client is pretty close to Linux.
* See https://developer.chrome.com/native-client and
* http://www.chromium.org/nativeclient
* @internal
*/
#define U_PF_BROWSER_NATIVE_CLIENT 4020
/** Android is based on Linux. @internal */
#define U_PF_ANDROID 4050
/** Fuchsia is a POSIX-ish platform. @internal */
#define U_PF_FUCHSIA 4100
/* Maximum value for Linux-based platform is 4499 */
/**
* Emscripten is a C++ transpiler for the Web that can target asm.js or
* WebAssembly. It provides some POSIX-compatible wrappers and stubs and
* some Linux-like functionality, but is not fully compatible with
* either.
* @internal
*/
#define U_PF_EMSCRIPTEN 5010
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
#define U_PF_OS390 9000
/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
#define U_PF_OS400 9400
#ifdef U_PLATFORM
/* Use the predefined value. */
#elif defined(__MINGW32__)
# define U_PLATFORM U_PF_MINGW
#elif defined(__CYGWIN__)
# define U_PLATFORM U_PF_CYGWIN
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
# define U_PLATFORM U_PF_WINDOWS
#elif defined(__ANDROID__)
# define U_PLATFORM U_PF_ANDROID
/* Android wchar_t support depends on the API level. */
# include <android/api-level.h>
#elif defined(__pnacl__) || defined(__native_client__)
# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
#elif defined(__Fuchsia__)
# define U_PLATFORM U_PF_FUCHSIA
#elif defined(linux) || defined(__linux__) || defined(__linux)
# define U_PLATFORM U_PF_LINUX
#elif defined(__APPLE__) && defined(__MACH__)
# include <TargetConditionals.h>
# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */
# define U_PLATFORM U_PF_IPHONE
# else
# define U_PLATFORM U_PF_DARWIN
# endif
#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
# if defined(__FreeBSD__)
# include <sys/endian.h>
# endif
# define U_PLATFORM U_PF_BSD
#elif defined(sun) || defined(__sun)
/* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
# define U_PLATFORM U_PF_SOLARIS
# if defined(__GNUC__)
/* Solaris/GCC needs this header file to get the proper endianness. Normally, this
* header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
* is included which does not include this header file.
*/
# include <sys/isa_defs.h>
# endif
#elif defined(_AIX) || defined(__TOS_AIX__)
# define U_PLATFORM U_PF_AIX
#elif defined(_hpux) || defined(hpux) || defined(__hpux)
# define U_PLATFORM U_PF_HPUX
#elif defined(sgi) || defined(__sgi)
# define U_PLATFORM U_PF_IRIX
#elif defined(__QNX__) || defined(__QNXNTO__)
# define U_PLATFORM U_PF_QNX
#elif defined(__TOS_MVS__)
# define U_PLATFORM U_PF_OS390
#elif defined(__OS400__) || defined(__TOS_OS400__)
# define U_PLATFORM U_PF_OS400
#elif defined(__EMSCRIPTEN__)
# define U_PLATFORM U_PF_EMSCRIPTEN
#else
# define U_PLATFORM U_PF_UNKNOWN
#endif
/**
* \def CYGWINMSVC
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
* Otherwise undefined.
* @internal
*/
/* Commented out because this is already set in mh-cygwin-msvc
#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
# define CYGWINMSVC
#endif
*/
#ifdef U_IN_DOXYGEN
# define CYGWINMSVC
#endif
/**
* \def U_PLATFORM_USES_ONLY_WIN32_API
* Defines whether the platform uses only the Win32 API.
* Set to 1 for Windows/MSVC and MinGW but not Cygwin.
* @internal
*/
#ifdef U_PLATFORM_USES_ONLY_WIN32_API
/* Use the predefined value. */
#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
# define U_PLATFORM_USES_ONLY_WIN32_API 1
#else
/* Cygwin implements POSIX. */
# define U_PLATFORM_USES_ONLY_WIN32_API 0
#endif
/**
* \def U_PLATFORM_HAS_WIN32_API
* Defines whether the Win32 API is available on the platform.
* Set to 1 for Windows/MSVC, MinGW and Cygwin.
* @internal
*/
#ifdef U_PLATFORM_HAS_WIN32_API
/* Use the predefined value. */
#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
# define U_PLATFORM_HAS_WIN32_API 1
#else
# define U_PLATFORM_HAS_WIN32_API 0
#endif
/**
* \def U_PLATFORM_HAS_WINUWP_API
* Defines whether target is intended for Universal Windows Platform API
* Set to 1 for Windows10 Release Solution Configuration
* @internal
*/
#ifdef U_PLATFORM_HAS_WINUWP_API
/* Use the predefined value. */
#else
# define U_PLATFORM_HAS_WINUWP_API 0
#endif
/**
* \def U_PLATFORM_IMPLEMENTS_POSIX
* Defines whether the platform implements (most of) the POSIX API.
* Set to 1 for Cygwin and most other platforms.
* @internal
*/
#ifdef U_PLATFORM_IMPLEMENTS_POSIX
/* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_PLATFORM_IMPLEMENTS_POSIX 0
#else
# define U_PLATFORM_IMPLEMENTS_POSIX 1
#endif
/**
* \def U_PLATFORM_IS_LINUX_BASED
* Defines whether the platform is Linux or one of its derivatives.
* @internal
*/
#ifdef U_PLATFORM_IS_LINUX_BASED
/* Use the predefined value. */
#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499
# define U_PLATFORM_IS_LINUX_BASED 1
#else
# define U_PLATFORM_IS_LINUX_BASED 0
#endif
/**
* \def U_PLATFORM_IS_DARWIN_BASED
* Defines whether the platform is Darwin or one of its derivatives.
* @internal
*/
#ifdef U_PLATFORM_IS_DARWIN_BASED
/* Use the predefined value. */
#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
# define U_PLATFORM_IS_DARWIN_BASED 1
#else
# define U_PLATFORM_IS_DARWIN_BASED 0
#endif
/**
* \def U_HAVE_STDINT_H
* Defines whether stdint.h is available. It is a C99 standard header.
* We used to include inttypes.h which includes stdint.h but we usually do not need
* the additional definitions from inttypes.h.
* @internal
*/
#ifdef U_HAVE_STDINT_H
/* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
/* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
# define U_HAVE_STDINT_H 1
# else
# define U_HAVE_STDINT_H 0
# endif
#elif U_PLATFORM == U_PF_SOLARIS
/* Solaris has inttypes.h but not stdint.h. */
# define U_HAVE_STDINT_H 0
#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
/* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
# define U_HAVE_STDINT_H 0
#else
# define U_HAVE_STDINT_H 1
#endif
/**
* \def U_HAVE_INTTYPES_H
* Defines whether inttypes.h is available. It is a C99 standard header.
* We include inttypes.h where it is available but stdint.h is not.
* @internal
*/
#ifdef U_HAVE_INTTYPES_H
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_SOLARIS
/* Solaris has inttypes.h but not stdint.h. */
# define U_HAVE_INTTYPES_H 1
#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
/* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
# define U_HAVE_INTTYPES_H 1
#else
/* Most platforms have both inttypes.h and stdint.h, or neither. */
# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
#endif
/*===========================================================================*/
/** @{ Compiler and environment features */
/*===========================================================================*/
/**
* \def U_GCC_MAJOR_MINOR
* Indicates whether the compiler is gcc (test for != 0),
* and if so, contains its major (times 100) and minor version numbers.
* If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
*
* For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
* use "#if U_GCC_MAJOR_MINOR >= 406".
* @internal
*/
#ifdef __GNUC__
# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
#else
# define U_GCC_MAJOR_MINOR 0
#endif
/**
* \def U_IS_BIG_ENDIAN
* Determines the endianness of the platform.
* @internal
*/
#ifdef U_IS_BIG_ENDIAN
/* Use the predefined value. */
#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
/* gcc */
# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
# define U_IS_BIG_ENDIAN 1
#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
# define U_IS_BIG_ENDIAN 0
#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
/* These platforms do not appear to predefine any endianness macros. */
# define U_IS_BIG_ENDIAN 1
#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
/* HPPA do not appear to predefine any endianness macros. */
# define U_IS_BIG_ENDIAN 1
#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
/* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
# define U_IS_BIG_ENDIAN 1
#else
# define U_IS_BIG_ENDIAN 0
#endif
/**
* \def U_HAVE_PLACEMENT_NEW
* Determines whether to override placement new and delete for STL.
* @stable ICU 2.6
*/
#ifdef U_HAVE_PLACEMENT_NEW
/* Use the predefined value. */
#elif defined(__BORLANDC__)
# define U_HAVE_PLACEMENT_NEW 0
#else
# define U_HAVE_PLACEMENT_NEW 1
#endif
/**
* \def U_HAVE_DEBUG_LOCATION_NEW
* Define this to define the MFC debug version of the operator new.
*
* @stable ICU 3.4
*/
#ifdef U_HAVE_DEBUG_LOCATION_NEW
/* Use the predefined value. */
#elif defined(_MSC_VER)
# define U_HAVE_DEBUG_LOCATION_NEW 1
#else
# define U_HAVE_DEBUG_LOCATION_NEW 0
#endif
/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */
#ifdef __has_attribute
# define UPRV_HAS_ATTRIBUTE(x) __has_attribute(x)
#else
# define UPRV_HAS_ATTRIBUTE(x) 0
#endif
#ifdef __has_cpp_attribute
# define UPRV_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
#else
# define UPRV_HAS_CPP_ATTRIBUTE(x) 0
#endif
#ifdef __has_declspec_attribute
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) __has_declspec_attribute(x)
#else
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) 0
#endif
#ifdef __has_builtin
# define UPRV_HAS_BUILTIN(x) __has_builtin(x)
#else
# define UPRV_HAS_BUILTIN(x) 0
#endif
#ifdef __has_feature
# define UPRV_HAS_FEATURE(x) __has_feature(x)
#else
# define UPRV_HAS_FEATURE(x) 0
#endif
#ifdef __has_extension
# define UPRV_HAS_EXTENSION(x) __has_extension(x)
#else
# define UPRV_HAS_EXTENSION(x) 0
#endif
#ifdef __has_warning
# define UPRV_HAS_WARNING(x) __has_warning(x)
#else
# define UPRV_HAS_WARNING(x) 0
#endif
/**
* \def U_MALLOC_ATTR
* Attribute to mark functions as malloc-like
* @internal
*/
#if defined(__GNUC__) && __GNUC__>=3
# define U_MALLOC_ATTR __attribute__ ((__malloc__))
#else
# define U_MALLOC_ATTR
#endif
/**
* \def U_ALLOC_SIZE_ATTR
* Attribute to specify the size of the allocated buffer for malloc-like functions
* @internal
*/
#if (defined(__GNUC__) && \
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || \
UPRV_HAS_ATTRIBUTE(alloc_size)
# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
#else
# define U_ALLOC_SIZE_ATTR(X)
# define U_ALLOC_SIZE_ATTR2(X,Y)
#endif
/**
* \def U_CPLUSPLUS_VERSION
* 0 if no C++; 1, 11, 14, ... if C++.
* Support for specific features cannot always be determined by the C++ version alone.
* @internal
*/
#ifdef U_CPLUSPLUS_VERSION
# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus)
# undef U_CPLUSPLUS_VERSION
# define U_CPLUSPLUS_VERSION 0
# endif
/* Otherwise use the predefined value. */
#elif !defined(__cplusplus)
# define U_CPLUSPLUS_VERSION 0
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
# define U_CPLUSPLUS_VERSION 14
#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
# define U_CPLUSPLUS_VERSION 11
#else
// C++98 or C++03
# define U_CPLUSPLUS_VERSION 1
#endif
#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
// add in std::nullptr_t
namespace std {
typedef decltype(nullptr) nullptr_t;
};
#endif
/**
* \def U_NOEXCEPT
* "noexcept" if supported, otherwise empty.
* Some code, especially STL containers, uses move semantics of objects only
* if the move constructor and the move operator are declared as not throwing exceptions.
* @internal
*/
#ifdef U_NOEXCEPT
/* Use the predefined value. */
#else
# define U_NOEXCEPT noexcept
#endif
/**
* \def U_FALLTHROUGH
* Annotate intentional fall-through between switch labels.
* http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
* @internal
*/
#ifndef __cplusplus
// Not for C.
#elif defined(U_FALLTHROUGH)
// Use the predefined value.
#elif defined(__clang__)
// Test for compiler vs. feature separately.
// Other compilers might choke on the feature test.
# if UPRV_HAS_CPP_ATTRIBUTE(clang::fallthrough) || \
(UPRV_HAS_FEATURE(cxx_attributes) && \
UPRV_HAS_WARNING("-Wimplicit-fallthrough"))
# define U_FALLTHROUGH [[clang::fallthrough]]
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 7)
# define U_FALLTHROUGH __attribute__((fallthrough))
#endif
#ifndef U_FALLTHROUGH
# define U_FALLTHROUGH
#endif
/** @} */
/*===========================================================================*/
/** @{ Character data types */
/*===========================================================================*/
/**
* U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
* @stable ICU 2.0
*/
#define U_ASCII_FAMILY 0
/**
* U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
* @stable ICU 2.0
*/
#define U_EBCDIC_FAMILY 1
/**
* \def U_CHARSET_FAMILY
*
* <p>These definitions allow to specify the encoding of text
* in the char data type as defined by the platform and the compiler.
* It is enough to determine the code point values of "invariant characters",
* which are the ones shared by all encodings that are in use
* on a given platform.</p>
*
* <p>Those "invariant characters" should be all the uppercase and lowercase
* latin letters, the digits, the space, and "basic punctuation".
* Also, '\\n', '\\r', '\\t' should be available.</p>
*
* <p>The list of "invariant characters" is:<br>
* \code
* A-Z a-z 0-9 SPACE " % &amp; ' ( ) * + , - . / : ; < = > ? _
* \endcode
* <br>
* (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
*
* <p>This matches the IBM Syntactic Character Set (CS 640).</p>
*
* <p>In other words, all the graphic characters in 7-bit ASCII should
* be safely accessible except the following:</p>
*
* \code
* '\' <backslash>
* '[' <left bracket>
* ']' <right bracket>
* '{' <left brace>
* '}' <right brace>
* '^' <circumflex>
* '~' <tilde>
* '!' <exclamation mark>
* '#' <number sign>
* '|' <vertical line>
* '$' <dollar sign>
* '@' <commercial at>
* '`' <grave accent>
* \endcode
* @stable ICU 2.0
*/
#ifdef U_CHARSET_FAMILY
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
#else
# define U_CHARSET_FAMILY U_ASCII_FAMILY
#endif
/**
* \def U_CHARSET_IS_UTF8
*
* Hardcode the default charset to UTF-8.
*
* If this is set to 1, then
* - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
* contain UTF-8 text, regardless of what the system API uses
* - some ICU code will use fast functions like u_strFromUTF8()
* rather than the more general and more heavy-weight conversion API (ucnv.h)
* - ucnv_getDefaultName() always returns "UTF-8"
* - ucnv_setDefaultName() is disabled and will not change the default charset
* - static builds of ICU are smaller
* - more functionality is available with the UCONFIG_NO_CONVERSION build-time
* configuration option (see unicode/uconfig.h)
* - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
*
* @stable ICU 4.2
* @see UCONFIG_NO_CONVERSION
*/
#ifdef U_CHARSET_IS_UTF8
/* Use the predefined value. */
#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED || \
U_PLATFORM == U_PF_EMSCRIPTEN
# define U_CHARSET_IS_UTF8 1
#else
# define U_CHARSET_IS_UTF8 0
#endif
/** @} */
/*===========================================================================*/
/** @{ Information about wchar support */
/*===========================================================================*/
/**
* \def U_HAVE_WCHAR_H
* Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
*
* @stable ICU 2.0
*/
#ifdef U_HAVE_WCHAR_H
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
/*
* Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
* The type and header existed, but the library functions did not work as expected.
* The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
*/
# define U_HAVE_WCHAR_H 0
#else
# define U_HAVE_WCHAR_H 1
#endif
/**
* \def U_SIZEOF_WCHAR_T
* U_SIZEOF_WCHAR_T==sizeof(wchar_t)
*
* @stable ICU 2.0
*/
#ifdef U_SIZEOF_WCHAR_T
/* Use the predefined value. */
#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9)
/*
* Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
* Newer Mac OS X has size 4.
*/
# define U_SIZEOF_WCHAR_T 1
#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
# define U_SIZEOF_WCHAR_T 2
#elif U_PLATFORM == U_PF_AIX
/*
* AIX 6.1 information, section "Wide character data representation":
* "... the wchar_t datatype is 32-bit in the 64-bit environment and
* 16-bit in the 32-bit environment."
* and
* "All locales use Unicode for their wide character code values (process code),
* except the IBM-eucTW codeset."
*/
# ifdef __64BIT__
# define U_SIZEOF_WCHAR_T 4
# else
# define U_SIZEOF_WCHAR_T 2
# endif
#elif U_PLATFORM == U_PF_OS390
/*
* z/OS V1R11 information center, section "LP64 | ILP32":
* "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
* Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
*/
# ifdef _LP64
# define U_SIZEOF_WCHAR_T 4
# else
# define U_SIZEOF_WCHAR_T 2
# endif
#elif U_PLATFORM == U_PF_OS400
# if defined(__UTF32__)
/*
* LOCALETYPE(*LOCALEUTF) is specified.
* Wide-character strings are in UTF-32,
* narrow-character strings are in UTF-8.
*/
# define U_SIZEOF_WCHAR_T 4
# elif defined(__UCS2__)
/*
* LOCALETYPE(*LOCALEUCS2) is specified.
* Wide-character strings are in UCS-2,
* narrow-character strings are in EBCDIC.
*/
# define U_SIZEOF_WCHAR_T 2
# else
/*
* LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
* Wide-character strings are in 16-bit EBCDIC,
* narrow-character strings are in EBCDIC.
*/
# define U_SIZEOF_WCHAR_T 2
# endif
#else
# define U_SIZEOF_WCHAR_T 4
#endif
#ifndef U_HAVE_WCSCPY
#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
#endif
/** @} */
/**
* \def U_HAVE_CHAR16_T
* Defines whether the char16_t type is available for UTF-16
* and u"abc" UTF-16 string literals are supported.
* This is a new standard type and standard string literal syntax in C++0x
* but has been available in some compilers before.
* @internal
*/
#ifdef U_HAVE_CHAR16_T
/* Use the predefined value. */
#else
/*
* Notes:
* Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef
* and does not support u"abc" string literals.
* Visual Studio 2015 (_MSC_VER>=1900) and above adds support for
* both char16_t and u"abc" string literals.
* gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
* does not support u"abc" string literals.
* C++11 and C11 require support for UTF-16 literals
* TODO: Fix for plain C. Doesn't work on Mac.
*/
# if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
# define U_HAVE_CHAR16_T 1
# else
# define U_HAVE_CHAR16_T 0
# endif
#endif
/**
* @{
* \def U_DECLARE_UTF16
* Do not use this macro because it is not defined on all platforms.
* Use the UNICODE_STRING or U_STRING_DECL macros instead.
* @internal
*/
#ifdef U_DECLARE_UTF16
/* Use the predefined value. */
#elif U_HAVE_CHAR16_T \
|| (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
|| (defined(__HP_aCC) && __HP_aCC >= 035000) \
|| (defined(__HP_cc) && __HP_cc >= 111106) \
|| (defined(U_IN_DOXYGEN))
# define U_DECLARE_UTF16(string) u ## string
#elif U_SIZEOF_WCHAR_T == 2 \
&& (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
# define U_DECLARE_UTF16(string) L ## string
#else
/* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
#endif
/** @} */
/*===========================================================================*/
/** @{ Symbol import-export control */
/*===========================================================================*/
#ifdef U_EXPORT
/* Use the predefined value. */
#elif defined(U_STATIC_IMPLEMENTATION)
# define U_EXPORT
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
# define U_EXPORT __declspec(dllexport)
#elif defined(__GNUC__)
# define U_EXPORT __attribute__((visibility("default")))
#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
|| (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
# define U_EXPORT __global
/*#elif defined(__HP_aCC) || defined(__HP_cc)
# define U_EXPORT __declspec(dllexport)*/
#else
# define U_EXPORT
#endif
/* U_CALLCONV is related to U_EXPORT2 */
#ifdef U_EXPORT2
/* Use the predefined value. */
#elif defined(_MSC_VER)
# define U_EXPORT2 __cdecl
#else
# define U_EXPORT2
#endif
#ifdef U_IMPORT
/* Use the predefined value. */
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
/* Windows needs to export/import data. */
# define U_IMPORT __declspec(dllimport)
#else
# define U_IMPORT
#endif
/**
* \def U_CALLCONV
* Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
* in callback function typedefs to make sure that the calling convention
* is compatible.
*
* This is only used for non-ICU-API functions.
* When a function is a public ICU API,
* you must use the U_CAPI and U_EXPORT2 qualifiers.
*
* Please note, you need to use U_CALLCONV after the *.
*
* NO : "static const char U_CALLCONV *func( . . . )"
* YES: "static const char* U_CALLCONV func( . . . )"
*
* @stable ICU 2.0
*/
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
# define U_CALLCONV __cdecl
#else
# define U_CALLCONV U_EXPORT2
#endif
/**
* \def U_CALLCONV_FPTR
* Similar to U_CALLCONV, but only used on function pointers.
* @internal
*/
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
# define U_CALLCONV_FPTR U_CALLCONV
#else
# define U_CALLCONV_FPTR
#endif
/** @} */
#endif // _PLATFORM_H

View File

@@ -0,0 +1,130 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : ptypes.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
* 09/18/08 srl Moved basic types back to ptypes.h from platform.h
******************************************************************************
*/
/**
* \file
* \brief C API: Definitions of integer types of various widths
*/
#ifndef _PTYPES_H
#define _PTYPES_H
/**
* \def __STDC_LIMIT_MACROS
* According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
* macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
* We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
* that uses such limit macros.
* @internal
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
/* NULL, size_t, wchar_t */
#include <stddef.h>
/*
* If all compilers provided all of the C99 headers and types,
* we would just unconditionally #include <stdint.h> here
* and not need any of the stuff after including platform.h.
*/
/* Find out if we have stdint.h etc. */
#include "unicode/platform.h"
/*===========================================================================*/
/* Generic data types */
/*===========================================================================*/
/* If your platform does not have the <stdint.h> header, you may
need to edit the typedefs in the #else section below.
Use #if...#else...#endif with predefined compiler macros if possible. */
#if U_HAVE_STDINT_H
/*
* We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
* <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
* which we almost never use, plus stuff like imaxabs() which we never use.
*/
#include <stdint.h>
#if U_PLATFORM == U_PF_OS390
/* The features header is needed to get (u)int64_t sometimes. */
#include <features.h>
/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
#if !defined(__uint8_t)
#define __uint8_t 1
typedef unsigned char uint8_t;
#endif
#endif /* U_PLATFORM == U_PF_OS390 */
#elif U_HAVE_INTTYPES_H
# include <inttypes.h>
#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
/// \cond
#if ! U_HAVE_INT8_T
typedef signed char int8_t;
#endif
#if ! U_HAVE_UINT8_T
typedef unsigned char uint8_t;
#endif
#if ! U_HAVE_INT16_T
typedef signed short int16_t;
#endif
#if ! U_HAVE_UINT16_T
typedef unsigned short uint16_t;
#endif
#if ! U_HAVE_INT32_T
typedef signed int int32_t;
#endif
#if ! U_HAVE_UINT32_T
typedef unsigned int uint32_t;
#endif
#if ! U_HAVE_INT64_T
#ifdef _MSC_VER
typedef signed __int64 int64_t;
#else
typedef signed long long int64_t;
#endif
#endif
#if ! U_HAVE_UINT64_T
#ifdef _MSC_VER
typedef unsigned __int64 uint64_t;
#else
typedef unsigned long long uint64_t;
#endif
#endif
/// \endcond
#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
#endif /* _PTYPES_H */

View File

@@ -0,0 +1,183 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1997-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : putil.h
*
* Date Name Description
* 05/14/98 nos Creation (content moved here from utypes.h).
* 06/17/99 erm Added IEEE_754
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
* 08/24/98 stephen Added longBitsFromDouble
* 03/02/99 stephen Removed openFile(). Added AS400 support.
* 04/15/99 stephen Converted to C
* 11/15/99 helena Integrated S/390 changes for IEEE support.
* 01/11/00 helena Added u_getVersion.
******************************************************************************
*/
#ifndef PUTIL_H
#define PUTIL_H
#include "unicode/utypes.h"
/**
* \file
* \brief C API: Platform Utilities
*/
/*==========================================================================*/
/* Platform utilities */
/*==========================================================================*/
/**
* Platform utilities isolates the platform dependencies of the
* library. For each platform which this code is ported to, these
* functions may have to be re-implemented.
*/
/**
* Return the ICU data directory.
* The data directory is where common format ICU data files (.dat files)
* are loaded from. Note that normal use of the built-in ICU
* facilities does not require loading of an external data file;
* unless you are adding custom data to ICU, the data directory
* does not need to be set.
*
* The data directory is determined as follows:
* If u_setDataDirectory() has been called, that is it, otherwise
* if the ICU_DATA environment variable is set, use that, otherwise
* If a data directory was specified at ICU build time
* <code>
* \code
* #define ICU_DATA_DIR "path"
* \endcode
* </code> use that,
* otherwise no data directory is available.
*
* @return the data directory, or an empty string ("") if no data directory has
* been specified.
*
* @stable ICU 2.0
*/
U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
/**
* Set the ICU data directory.
* The data directory is where common format ICU data files (.dat files)
* are loaded from. Note that normal use of the built-in ICU
* facilities does not require loading of an external data file;
* unless you are adding custom data to ICU, the data directory
* does not need to be set.
*
* This function should be called at most once in a process, before the
* first ICU operation (e.g., u_init()) that will require the loading of an
* ICU data file.
* This function is not thread-safe. Use it before calling ICU APIs from
* multiple threads.
*
* @param directory The directory to be set.
*
* @see u_init
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
#ifndef U_HIDE_INTERNAL_API
/**
* Return the time zone files override directory, or an empty string if
* no directory was specified. Certain time zone resources will be preferentially
* loaded from individual files in this directory.
*
* @return the time zone data override directory.
* @internal
*/
U_CAPI const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status);
/**
* Set the time zone files override directory.
* This function is not thread safe; it must not be called concurrently with
* u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions.
* This function should only be called before using any ICU service that
* will access the time zone data.
* @internal
*/
U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status);
#endif /* U_HIDE_INTERNAL_API */
/**
* @{
* Filesystem file and path separator characters.
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
* @stable ICU 2.0
*/
#if U_PLATFORM_USES_ONLY_WIN32_API
# define U_FILE_SEP_CHAR '\\'
# define U_FILE_ALT_SEP_CHAR '/'
# define U_PATH_SEP_CHAR ';'
# define U_FILE_SEP_STRING "\\"
# define U_FILE_ALT_SEP_STRING "/"
# define U_PATH_SEP_STRING ";"
#else
# define U_FILE_SEP_CHAR '/'
# define U_FILE_ALT_SEP_CHAR '/'
# define U_PATH_SEP_CHAR ':'
# define U_FILE_SEP_STRING "/"
# define U_FILE_ALT_SEP_STRING "/"
# define U_PATH_SEP_STRING ":"
#endif
/** @} */
/**
* Convert char characters to UChar characters.
* This utility function is useful only for "invariant characters"
* that are encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see U_CHARSET_FAMILY.
*
* @param cs Input string, points to <code>length</code>
* character bytes from a subset of the platform encoding.
* @param us Output string, points to memory for <code>length</code>
* Unicode characters.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*
* @see U_CHARSET_FAMILY
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
u_charsToUChars(const char *cs, UChar *us, int32_t length);
/**
* Convert UChar characters to char characters.
* This utility function is useful only for "invariant characters"
* that can be encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see U_CHARSET_FAMILY.
*
* @param us Input string, points to <code>length</code>
* Unicode characters that can be encoded with the
* codepage-invariant subset of the platform encoding.
* @param cs Output string, points to memory for <code>length</code>
* character bytes.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*
* @see U_CHARSET_FAMILY
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar *us, char *cs, int32_t length);
#endif

View File

@@ -0,0 +1,266 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**************************************************************************
* Copyright (C) 1999-2012, International Business Machines Corporation and
* others. All Rights Reserved.
**************************************************************************
* Date Name Description
* 11/17/99 aliu Creation. Ported from java. Modified to
* match current UnicodeString API. Forced
* to use name "handleReplaceBetween" because
* of existing methods in UnicodeString.
**************************************************************************
*/
#ifndef REP_H
#define REP_H
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/uobject.h"
/**
* \file
* \brief C++ API: Replaceable String
*/
U_NAMESPACE_BEGIN
class UnicodeString;
/**
* <code>Replaceable</code> is an abstract base class representing a
* string of characters that supports the replacement of a range of
* itself with a new string of characters. It is used by APIs that
* change a piece of text while retaining metadata. Metadata is data
* other than the Unicode characters returned by char32At(). One
* example of metadata is style attributes; another is an edit
* history, marking each character with an author and revision number.
*
* <p>An implicit aspect of the <code>Replaceable</code> API is that
* during a replace operation, new characters take on the metadata of
* the old characters. For example, if the string "the <b>bold</b>
* font" has range (4, 8) replaced with "strong", then it becomes "the
* <b>strong</b> font".
*
* <p><code>Replaceable</code> specifies ranges using a start
* offset and a limit offset. The range of characters thus specified
* includes the characters at offset start..limit-1. That is, the
* start offset is inclusive, and the limit offset is exclusive.
*
* <p><code>Replaceable</code> also includes API to access characters
* in the string: <code>length()</code>, <code>charAt()</code>,
* <code>char32At()</code>, and <code>extractBetween()</code>.
*
* <p>For a subclass to support metadata, typical behavior of
* <code>replace()</code> is the following:
* <ul>
* <li>Set the metadata of the new text to the metadata of the first
* character replaced</li>
* <li>If no characters are replaced, use the metadata of the
* previous character</li>
* <li>If there is no previous character (i.e. start == 0), use the
* following character</li>
* <li>If there is no following character (i.e. the replaceable was
* empty), use default metadata.<br>
* <li>If the code point U+FFFF is seen, it should be interpreted as
* a special marker having no metadata<li>
* </li>
* </ul>
* If this is not the behavior, the subclass should document any differences.
* @author Alan Liu
* @stable ICU 2.0
*/
class U_COMMON_API Replaceable : public UObject {
public:
/**
* Destructor.
* @stable ICU 2.0
*/
virtual ~Replaceable();
/**
* Returns the number of 16-bit code units in the text.
* @return number of 16-bit code units in text
* @stable ICU 1.8
*/
inline int32_t length() const;
/**
* Returns the 16-bit code unit at the given offset into the text.
* @param offset an integer between 0 and <code>length()</code>-1
* inclusive
* @return 16-bit code unit of text at given offset
* @stable ICU 1.8
*/
inline char16_t charAt(int32_t offset) const;
/**
* Returns the 32-bit code point at the given 16-bit offset into
* the text. This assumes the text is stored as 16-bit code units
* with surrogate pairs intermixed. If the offset of a leading or
* trailing code unit of a surrogate pair is given, return the
* code point of the surrogate pair.
*
* @param offset an integer between 0 and <code>length()</code>-1
* inclusive
* @return 32-bit code point of text at given offset
* @stable ICU 1.8
*/
inline UChar32 char32At(int32_t offset) const;
/**
* Copies characters in the range [<tt>start</tt>, <tt>limit</tt>)
* into the UnicodeString <tt>target</tt>.
* @param start offset of first character which will be copied
* @param limit offset immediately following the last character to
* be copied
* @param target UnicodeString into which to copy characters.
* @return A reference to <TT>target</TT>
* @stable ICU 2.1
*/
virtual void extractBetween(int32_t start,
int32_t limit,
UnicodeString& target) const = 0;
/**
* Replaces a substring of this object with the given text. If the
* characters being replaced have metadata, the new characters
* that replace them should be given the same metadata.
*
* <p>Subclasses must ensure that if the text between start and
* limit is equal to the replacement text, that replace has no
* effect. That is, any metadata
* should be unaffected. In addition, subclasses are encouraged to
* check for initial and trailing identical characters, and make a
* smaller replacement if possible. This will preserve as much
* metadata as possible.
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= length()</code>.
* @param text the text to replace characters <code>start</code>
* to <code>limit - 1</code>
* @stable ICU 2.0
*/
virtual void handleReplaceBetween(int32_t start,
int32_t limit,
const UnicodeString& text) = 0;
// Note: All other methods in this class take the names of
// existing UnicodeString methods. This method is the exception.
// It is named differently because all replace methods of
// UnicodeString return a UnicodeString&. The 'between' is
// required in order to conform to the UnicodeString naming
// convention; API taking start/length are named <operation>, and
// those taking start/limit are named <operationBetween>. The
// 'handle' is added because 'replaceBetween' and
// 'doReplaceBetween' are already taken.
/**
* Copies a substring of this object, retaining metadata.
* This method is used to duplicate or reorder substrings.
* The destination index must not overlap the source range.
*
* @param start the beginning index, inclusive; <code>0 <= start <=
* limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit <=
* length()</code>.
* @param dest the destination index. The characters from
* <code>start..limit-1</code> will be copied to <code>dest</code>.
* Implementations of this method may assume that <code>dest <= start ||
* dest >= limit</code>.
* @stable ICU 2.0
*/
virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
/**
* Returns true if this object contains metadata. If a
* Replaceable object has metadata, calls to the Replaceable API
* must be made so as to preserve metadata. If it does not, calls
* to the Replaceable API may be optimized to improve performance.
* The default implementation returns true.
* @return true if this object contains metadata
* @stable ICU 2.2
*/
virtual UBool hasMetaData() const;
/**
* Clone this object, an instance of a subclass of Replaceable.
* Clones can be used concurrently in multiple threads.
* If a subclass does not implement clone(), or if an error occurs,
* then NULL is returned.
* The caller must delete the clone.
*
* @return a clone of this object
*
* @see getDynamicClassID
* @stable ICU 2.6
*/
virtual Replaceable *clone() const;
protected:
/**
* Default constructor.
* @stable ICU 2.4
*/
inline Replaceable();
/*
* Assignment operator not declared. The compiler will provide one
* which does nothing since this class does not contain any data members.
* API/code coverage may show the assignment operator as present and
* untested - ignore.
* Subclasses need this assignment operator if they use compiler-provided
* assignment operators of their own. An alternative to not declaring one
* here would be to declare and empty-implement a protected or public one.
Replaceable &Replaceable::operator=(const Replaceable &);
*/
/**
* Virtual version of length().
* @stable ICU 2.4
*/
virtual int32_t getLength() const = 0;
/**
* Virtual version of charAt().
* @stable ICU 2.4
*/
virtual char16_t getCharAt(int32_t offset) const = 0;
/**
* Virtual version of char32At().
* @stable ICU 2.4
*/
virtual UChar32 getChar32At(int32_t offset) const = 0;
};
inline Replaceable::Replaceable() {}
inline int32_t
Replaceable::length() const {
return getLength();
}
inline char16_t
Replaceable::charAt(int32_t offset) const {
return getCharAt(offset);
}
inline UChar32
Replaceable::char32At(int32_t offset) const {
return getChar32At(offset);
}
// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif

View File

@@ -0,0 +1,41 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2009-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: std_string.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009feb19
* created by: Markus W. Scherer
*/
#ifndef __STD_STRING_H__
#define __STD_STRING_H__
/**
* \file
* \brief C++ API: Central ICU header for including the C++ standard &lt;string&gt;
* header and for related definitions.
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
// Workaround for a libstdc++ bug before libstdc++4.6 (2011).
// https://bugs.llvm.org/show_bug.cgi?id=13364
#if defined(__GLIBCXX__)
namespace std { class type_info; }
#endif
#include <string>
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // __STD_STRING_H__

View File

@@ -0,0 +1,281 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*/
#ifndef STRENUM_H
#define STRENUM_H
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/uobject.h"
#include "unicode/unistr.h"
/**
* \file
* \brief C++ API: String Enumeration
*/
U_NAMESPACE_BEGIN
/**
* Base class for 'pure' C++ implementations of uenum api. Adds a
* method that returns the next UnicodeString since in C++ this can
* be a common storage format for strings.
*
* <p>The model is that the enumeration is over strings maintained by
* a 'service.' At any point, the service might change, invalidating
* the enumerator (though this is expected to be rare). The iterator
* returns an error if this has occurred. Lack of the error is no
* guarantee that the service didn't change immediately after the
* call, so the returned string still might not be 'valid' on
* subsequent use.</p>
*
* <p>Strings may take the form of const char*, const char16_t*, or const
* UnicodeString*. The type you get is determine by the variant of
* 'next' that you call. In general the StringEnumeration is
* optimized for one of these types, but all StringEnumerations can
* return all types. Returned strings are each terminated with a NUL.
* Depending on the service data, they might also include embedded NUL
* characters, so API is provided to optionally return the true
* length, counting the embedded NULs but not counting the terminating
* NUL.</p>
*
* <p>The pointers returned by next, unext, and snext become invalid
* upon any subsequent call to the enumeration's destructor, next,
* unext, snext, or reset.</p>
*
* ICU 2.8 adds some default implementations and helper functions
* for subclasses.
*
* @stable ICU 2.4
*/
class U_COMMON_API StringEnumeration : public UObject {
public:
/**
* Destructor.
* @stable ICU 2.4
*/
virtual ~StringEnumeration();
/**
* Clone this object, an instance of a subclass of StringEnumeration.
* Clones can be used concurrently in multiple threads.
* If a subclass does not implement clone(), or if an error occurs,
* then NULL is returned.
* The caller must delete the clone.
*
* @return a clone of this object
*
* @see getDynamicClassID
* @stable ICU 2.8
*/
virtual StringEnumeration *clone() const;
/**
* <p>Return the number of elements that the iterator traverses. If
* the iterator is out of sync with its service, status is set to
* U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
*
* <p>The return value will not change except possibly as a result of
* a subsequent call to reset, or if the iterator becomes out of sync.</p>
*
* <p>This is a convenience function. It can end up being very
* expensive as all the items might have to be pre-fetched
* (depending on the storage format of the data being
* traversed).</p>
*
* @param status the error code.
* @return number of elements in the iterator.
*
* @stable ICU 2.4 */
virtual int32_t count(UErrorCode& status) const = 0;
/**
* <p>Returns the next element as a NUL-terminated char*. If there
* are no more elements, returns NULL. If the resultLength pointer
* is not NULL, the length of the string (not counting the
* terminating NUL) is returned at that address. If an error
* status is returned, the value at resultLength is undefined.</p>
*
* <p>The returned pointer is owned by this iterator and must not be
* deleted by the caller. The pointer is valid until the next call
* to next, unext, snext, reset, or the enumerator's destructor.</p>
*
* <p>If the iterator is out of sync with its service, status is set
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
*
* <p>If the native service string is a char16_t* string, it is
* converted to char* with the invariant converter. If the
* conversion fails (because a character cannot be converted) then
* status is set to U_INVARIANT_CONVERSION_ERROR and the return
* value is undefined (though not NULL).</p>
*
* Starting with ICU 2.8, the default implementation calls snext()
* and handles the conversion.
* Either next() or snext() must be implemented differently by a subclass.
*
* @param status the error code.
* @param resultLength a pointer to receive the length, can be NULL.
* @return a pointer to the string, or NULL.
*
* @stable ICU 2.4
*/
virtual const char* next(int32_t *resultLength, UErrorCode& status);
/**
* <p>Returns the next element as a NUL-terminated char16_t*. If there
* are no more elements, returns NULL. If the resultLength pointer
* is not NULL, the length of the string (not counting the
* terminating NUL) is returned at that address. If an error
* status is returned, the value at resultLength is undefined.</p>
*
* <p>The returned pointer is owned by this iterator and must not be
* deleted by the caller. The pointer is valid until the next call
* to next, unext, snext, reset, or the enumerator's destructor.</p>
*
* <p>If the iterator is out of sync with its service, status is set
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
*
* Starting with ICU 2.8, the default implementation calls snext()
* and handles the conversion.
*
* @param status the error code.
* @param resultLength a ponter to receive the length, can be NULL.
* @return a pointer to the string, or NULL.
*
* @stable ICU 2.4
*/
virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status);
/**
* <p>Returns the next element a UnicodeString*. If there are no
* more elements, returns NULL.</p>
*
* <p>The returned pointer is owned by this iterator and must not be
* deleted by the caller. The pointer is valid until the next call
* to next, unext, snext, reset, or the enumerator's destructor.</p>
*
* <p>If the iterator is out of sync with its service, status is set
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
*
* Starting with ICU 2.8, the default implementation calls next()
* and handles the conversion.
* Either next() or snext() must be implemented differently by a subclass.
*
* @param status the error code.
* @return a pointer to the string, or NULL.
*
* @stable ICU 2.4
*/
virtual const UnicodeString* snext(UErrorCode& status);
/**
* <p>Resets the iterator. This re-establishes sync with the
* service and rewinds the iterator to start at the first
* element.</p>
*
* <p>Previous pointers returned by next, unext, or snext become
* invalid, and the value returned by count might change.</p>
*
* @param status the error code.
*
* @stable ICU 2.4
*/
virtual void reset(UErrorCode& status) = 0;
/**
* Compares this enumeration to other to check if both are equal
*
* @param that The other string enumeration to compare this object to
* @return true if the enumerations are equal. false if not.
* @stable ICU 3.6
*/
virtual UBool operator==(const StringEnumeration& that)const;
/**
* Compares this enumeration to other to check if both are not equal
*
* @param that The other string enumeration to compare this object to
* @return true if the enumerations are equal. false if not.
* @stable ICU 3.6
*/
virtual UBool operator!=(const StringEnumeration& that)const;
protected:
/**
* UnicodeString field for use with default implementations and subclasses.
* @stable ICU 2.8
*/
UnicodeString unistr;
/**
* char * default buffer for use with default implementations and subclasses.
* @stable ICU 2.8
*/
char charsBuffer[32];
/**
* char * buffer for use with default implementations and subclasses.
* Allocated in constructor and in ensureCharsCapacity().
* @stable ICU 2.8
*/
char *chars;
/**
* Capacity of chars, for use with default implementations and subclasses.
* @stable ICU 2.8
*/
int32_t charsCapacity;
/**
* Default constructor for use with default implementations and subclasses.
* @stable ICU 2.8
*/
StringEnumeration();
/**
* Ensures that chars is at least as large as the requested capacity.
* For use with default implementations and subclasses.
*
* @param capacity Requested capacity.
* @param status ICU in/out error code.
* @stable ICU 2.8
*/
void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
/**
* Converts s to Unicode and sets unistr to the result.
* For use with default implementations and subclasses,
* especially for implementations of snext() in terms of next().
* This is provided with a helper function instead of a default implementation
* of snext() to avoid potential infinite loops between next() and snext().
*
* For example:
* \code
* const UnicodeString* snext(UErrorCode& status) {
* int32_t resultLength=0;
* const char *s=next(&resultLength, status);
* return setChars(s, resultLength, status);
* }
* \endcode
*
* @param s String to be converted to Unicode.
* @param length Length of the string.
* @param status ICU in/out error code.
* @return A pointer to unistr.
* @stable ICU 2.8
*/
UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
};
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
/* STRENUM_H */
#endif

View File

@@ -0,0 +1,190 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// stringoptions.h
// created: 2017jun08 Markus W. Scherer
#ifndef __STRINGOPTIONS_H__
#define __STRINGOPTIONS_H__
#include "unicode/utypes.h"
/**
* \file
* \brief C API: Bit set option bit constants for various string and character processing functions.
*/
/**
* Option value for case folding: Use default mappings defined in CaseFolding.txt.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_DEFAULT 0
/**
* Option value for case folding:
*
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
* and dotless i appropriately for Turkic languages (tr, az).
*
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
* are to be included for default mappings and
* excluded for the Turkic-specific mappings.
*
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
* are to be excluded for default mappings and
* included for the Turkic-specific mappings.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
/**
* Titlecase the string as a whole rather than each word.
* (Titlecase only the character at index 0, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @stable ICU 60
*/
#define U_TITLECASE_WHOLE_STRING 0x20
/**
* Titlecase sentences rather than words.
* (Titlecase only the first character of each sentence, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @stable ICU 60
*/
#define U_TITLECASE_SENTENCES 0x40
/**
* Do not lowercase non-initial parts of words when titlecasing.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will titlecase the character at each
* (possibly adjusted) BreakIterator index and
* lowercase all other characters up to the next iterator index.
* With this option, the other characters will not be modified.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_LOWERCASE 0x100
/**
* Do not adjust the titlecasing BreakIterator indexes;
* titlecase exactly the characters at breaks from the iterator.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will take each break iterator index,
* adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
* and titlecase that one.
*
* Other characters are lowercased.
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see U_TITLECASE_NO_LOWERCASE
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
/**
* Adjust each titlecasing BreakIterator index to the next cased character.
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
* Option bit for titlecasing APIs that take an options bit set.
*
* This used to be the default index adjustment in ICU.
* Since ICU 60, the default index adjustment is to the next character that is
* a letter, number, symbol, or private use code point.
* (Uncased modifier letters are skipped.)
* The difference in behavior is small for word titlecasing,
* but the new adjustment is much better for whole-string and sentence titlecasing:
* It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
* @stable ICU 60
*/
#define U_TITLECASE_ADJUST_TO_CASED 0x400
/**
* Option for string transformation functions to not first reset the Edits object.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @stable ICU 60
*/
#define U_EDITS_NO_RESET 0x2000
/**
* Omit unchanged text when recording how source substrings
* relate to changed and unchanged result substrings.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @stable ICU 60
*/
#define U_OMIT_UNCHANGED_TEXT 0x4000
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
* @stable ICU 2.2
*/
#define U_COMPARE_IGNORE_CASE 0x10000
/**
* Option bit for unorm_compare:
* Both input strings are assumed to fulfill FCD conditions.
* @stable ICU 2.2
*/
#define UNORM_INPUT_IS_FCD 0x20000
// Related definitions elsewhere.
// Options that are not meaningful in the same functions
// can share the same bits.
//
// Public:
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
//
// Internal: (may change or be removed)
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
#endif // __STRINGOPTIONS_H__

View File

@@ -0,0 +1,343 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Copyright (C) 2009-2013, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2001 and onwards Google Inc.
// Author: Sanjay Ghemawat
// This code is a contribution of Google code, and the style used here is
// a compromise between the original Google code and the ICU coding guidelines.
// For example, data types are ICU-ified (size_t,int->int32_t),
// and API comments doxygen-ified, but function names and behavior are
// as in the original, if possible.
// Assertion-style error handling, not available in ICU, was changed to
// parameter "pinning" similar to UnicodeString.
//
// In addition, this is only a partial port of the original Google code,
// limited to what was needed so far. The (nearly) complete original code
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
// (see ICU ticket 6765, r25517).
#ifndef __STRINGPIECE_H__
#define __STRINGPIECE_H__
/**
* \file
* \brief C++ API: StringPiece: Read-only byte string wrapper class.
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include <cstddef>
#include <type_traits>
#include "unicode/uobject.h"
#include "unicode/std_string.h"
// Arghh! I wish C++ literals were "string".
U_NAMESPACE_BEGIN
/**
* A string-like object that points to a sized piece of memory.
*
* We provide non-explicit singleton constructors so users can pass
* in a "const char*" or a "string" wherever a "StringPiece" is
* expected.
*
* Functions or methods may use StringPiece parameters to accept either a
* "const char*" or a "string" value that will be implicitly converted to a
* StringPiece.
*
* Systematic usage of StringPiece is encouraged as it will reduce unnecessary
* conversions from "const char*" to "string" and back again.
*
* @stable ICU 4.2
*/
class U_COMMON_API StringPiece : public UMemory {
private:
const char* ptr_;
int32_t length_;
public:
/**
* Default constructor, creates an empty StringPiece.
* @stable ICU 4.2
*/
StringPiece() : ptr_(nullptr), length_(0) { }
/**
* Constructs from a NUL-terminated const char * pointer.
* @param str a NUL-terminated const char * pointer
* @stable ICU 4.2
*/
StringPiece(const char* str);
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a NUL-terminated const char8_t * pointer.
* @param str a NUL-terminated const char8_t * pointer
* @stable ICU 67
*/
StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
#endif
/**
* Constructs an empty StringPiece.
* Needed for type disambiguation from multiple other overloads.
* @param p nullptr
* @stable ICU 67
*/
StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
/**
* Constructs from a std::string.
* @stable ICU 4.2
*/
StringPiece(const std::string& str)
: ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a std::u8string.
* @stable ICU 67
*/
StringPiece(const std::u8string& str)
: ptr_(reinterpret_cast<const char*>(str.data())),
length_(static_cast<int32_t>(str.size())) { }
#endif
/**
* Constructs from some other implementation of a string piece class, from any
* C++ record type that has these two methods:
*
* \code{.cpp}
*
* struct OtherStringPieceClass {
* const char* data(); // or const char8_t*
* size_t size();
* };
*
* \endcode
*
* The other string piece class will typically be std::string_view from C++17
* or absl::string_view from Abseil.
*
* Starting with C++20, data() may also return a const char8_t* pointer,
* as from std::u8string_view.
*
* @param str the other string piece
* @stable ICU 65
*/
template <typename T,
typename = typename std::enable_if<
(std::is_same<decltype(T().data()), const char*>::value
#if defined(__cpp_char8_t)
|| std::is_same<decltype(T().data()), const char8_t*>::value
#endif
) &&
std::is_same<decltype(T().size()), size_t>::value>::type>
StringPiece(T str)
: ptr_(reinterpret_cast<const char*>(str.data())),
length_(static_cast<int32_t>(str.size())) {}
/**
* Constructs from a const char * pointer and a specified length.
* @param offset a const char * pointer (need not be terminated)
* @param len the length of the string; must be non-negative
* @stable ICU 4.2
*/
StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a const char8_t * pointer and a specified length.
* @param str a const char8_t * pointer (need not be terminated)
* @param len the length of the string; must be non-negative
* @stable ICU 67
*/
StringPiece(const char8_t* str, int32_t len) :
StringPiece(reinterpret_cast<const char*>(str), len) {}
#endif
/**
* Substring of another StringPiece.
* @param x the other StringPiece
* @param pos start position in x; must be non-negative and <= x.length().
* @stable ICU 4.2
*/
StringPiece(const StringPiece& x, int32_t pos);
/**
* Substring of another StringPiece.
* @param x the other StringPiece
* @param pos start position in x; must be non-negative and <= x.length().
* @param len length of the substring;
* must be non-negative and will be pinned to at most x.length() - pos.
* @stable ICU 4.2
*/
StringPiece(const StringPiece& x, int32_t pos, int32_t len);
/**
* Returns the string pointer. May be nullptr if it is empty.
*
* data() may return a pointer to a buffer with embedded NULs, and the
* returned buffer may or may not be null terminated. Therefore it is
* typically a mistake to pass data() to a routine that expects a NUL
* terminated string.
* @return the string pointer
* @stable ICU 4.2
*/
const char* data() const { return ptr_; }
/**
* Returns the string length. Same as length().
* @return the string length
* @stable ICU 4.2
*/
int32_t size() const { return length_; }
/**
* Returns the string length. Same as size().
* @return the string length
* @stable ICU 4.2
*/
int32_t length() const { return length_; }
/**
* Returns whether the string is empty.
* @return true if the string is empty
* @stable ICU 4.2
*/
UBool empty() const { return length_ == 0; }
/**
* Sets to an empty string.
* @stable ICU 4.2
*/
void clear() { ptr_ = nullptr; length_ = 0; }
/**
* Reset the stringpiece to refer to new data.
* @param xdata pointer the new string data. Need not be nul terminated.
* @param len the length of the new data
* @stable ICU 4.8
*/
void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
/**
* Reset the stringpiece to refer to new data.
* @param str a pointer to a NUL-terminated string.
* @stable ICU 4.8
*/
void set(const char* str);
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Resets the stringpiece to refer to new data.
* @param xdata pointer the new string data. Need not be NUL-terminated.
* @param len the length of the new data
* @stable ICU 67
*/
inline void set(const char8_t* xdata, int32_t len) {
set(reinterpret_cast<const char*>(xdata), len);
}
/**
* Resets the stringpiece to refer to new data.
* @param str a pointer to a NUL-terminated string.
* @stable ICU 67
*/
inline void set(const char8_t* str) {
set(reinterpret_cast<const char*>(str));
}
#endif
/**
* Removes the first n string units.
* @param n prefix length, must be non-negative and <=length()
* @stable ICU 4.2
*/
void remove_prefix(int32_t n) {
if (n >= 0) {
if (n > length_) {
n = length_;
}
ptr_ += n;
length_ -= n;
}
}
/**
* Removes the last n string units.
* @param n suffix length, must be non-negative and <=length()
* @stable ICU 4.2
*/
void remove_suffix(int32_t n) {
if (n >= 0) {
if (n <= length_) {
length_ -= n;
} else {
length_ = 0;
}
}
}
/**
* Searches the StringPiece for the given search string (needle);
* @param needle The string for which to search.
* @param offset Where to start searching within this string (haystack).
* @return The offset of needle in haystack, or -1 if not found.
* @stable ICU 67
*/
int32_t find(StringPiece needle, int32_t offset);
/**
* Compares this StringPiece with the other StringPiece, with semantics
* similar to std::string::compare().
* @param other The string to compare to.
* @return below zero if this < other; above zero if this > other; 0 if this == other.
* @stable ICU 67
*/
int32_t compare(StringPiece other);
/**
* Maximum integer, used as a default value for substring methods.
* @stable ICU 4.2
*/
static const int32_t npos; // = 0x7fffffff;
/**
* Returns a substring of this StringPiece.
* @param pos start position; must be non-negative and <= length().
* @param len length of the substring;
* must be non-negative and will be pinned to at most length() - pos.
* @return the substring StringPiece
* @stable ICU 4.2
*/
StringPiece substr(int32_t pos, int32_t len = npos) const {
return StringPiece(*this, pos, len);
}
};
/**
* Global operator == for StringPiece
* @param x The first StringPiece to compare.
* @param y The second StringPiece to compare.
* @return true if the string data is equal
* @stable ICU 4.8
*/
U_EXPORT UBool U_EXPORT2
operator==(const StringPiece& x, const StringPiece& y);
/**
* Global operator != for StringPiece
* @param x The first StringPiece to compare.
* @param y The second StringPiece to compare.
* @return true if the string data is not equal
* @stable ICU 4.8
*/
inline UBool operator!=(const StringPiece& x, const StringPiece& y) {
return !(x == y);
}
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // __STRINGPIECE_H__

View File

@@ -0,0 +1,650 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
* Copyright (C) 1996-2015, International Business Machines Corporation and others.
* All Rights Reserved.
******************************************************************************
*/
#ifndef UBRK_H
#define UBRK_H
#include "unicode/utypes.h"
#include "unicode/uloc.h"
#include "unicode/utext.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/localpointer.h"
#endif // U_SHOW_CPLUSPLUS_API
/**
* A text-break iterator.
* For usage in C programs.
*/
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
# define UBRK_TYPEDEF_UBREAK_ITERATOR
/**
* Opaque type representing an ICU Break iterator object.
* @stable ICU 2.0
*/
typedef struct UBreakIterator UBreakIterator;
#endif
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/parseerr.h"
/**
* \file
* \brief C API: BreakIterator
*
* <h2> BreakIterator C API </h2>
*
* The BreakIterator C API defines methods for finding the location
* of boundaries in text. Pointer to a UBreakIterator maintain a
* current position and scan over text returning the index of characters
* where boundaries occur.
* <p>
* Line boundary analysis determines where a text string can be broken
* when line-wrapping. The mechanism correctly handles punctuation and
* hyphenated words.
* <p>
* Note: The locale keyword "lb" can be used to modify line break
* behavior according to the CSS level 3 line-break options, see
* <http://dev.w3.org/csswg/css-text/#line-breaking>. For example:
* "ja@lb=strict", "zh@lb=loose".
* <p>
* Sentence boundary analysis allows selection with correct
* interpretation of periods within numbers and abbreviations, and
* trailing punctuation marks such as quotation marks and parentheses.
* <p>
* Note: The locale keyword "ss" can be used to enable use of
* segmentation suppression data (preventing breaks in English after
* abbreviations such as "Mr." or "Est.", for example), as follows:
* "en@ss=standard".
* <p>
* Word boundary analysis is used by search and replace functions, as
* well as within text editing applications that allow the user to
* select words with a double click. Word selection provides correct
* interpretation of punctuation marks within and following
* words. Characters that are not part of a word, such as symbols or
* punctuation marks, have word-breaks on both sides.
* <p>
* Character boundary analysis identifies the boundaries of
* "Extended Grapheme Clusters", which are groupings of codepoints
* that should be treated as character-like units for many text operations.
* Please see Unicode Standard Annex #29, Unicode Text Segmentation,
* http://www.unicode.org/reports/tr29/ for additional information
* on grapheme clusters and guidelines on their use.
* <p>
* Title boundary analysis locates all positions,
* typically starts of words, that should be set to Title Case
* when title casing the text.
* <p>
* The text boundary positions are found according to the rules
* described in Unicode Standard Annex #29, Text Boundaries, and
* Unicode Standard Annex #14, Line Breaking Properties. These
* are available at http://www.unicode.org/reports/tr14/ and
* http://www.unicode.org/reports/tr29/.
* <p>
* In addition to the plain C API defined in this header file, an
* object oriented C++ API with equivalent functionality is defined in the
* file brkiter.h.
* <p>
* Code snippets illustrating the use of the Break Iterator APIs
* are available in the ICU User Guide,
* http://icu-project.org/userguide/boundaryAnalysis.html
* and in the sample program icu/source/samples/break/break.cpp
*/
/** The possible types of text boundaries. @stable ICU 2.0 */
typedef enum UBreakIteratorType {
/** Character breaks @stable ICU 2.0 */
UBRK_CHARACTER = 0,
/** Word breaks @stable ICU 2.0 */
UBRK_WORD = 1,
/** Line breaks @stable ICU 2.0 */
UBRK_LINE = 2,
/** Sentence breaks @stable ICU 2.0 */
UBRK_SENTENCE = 3,
#ifndef U_HIDE_DEPRECATED_API
/**
* Title Case breaks
* The iterator created using this type locates title boundaries as described for
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
* please use Word Boundary iterator.
*
* @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
*/
UBRK_TITLE = 4,
/**
* One more than the highest normal UBreakIteratorType value.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UBRK_COUNT = 5
#endif // U_HIDE_DEPRECATED_API
} UBreakIteratorType;
/** Value indicating all text boundaries have been returned.
* @stable ICU 2.0
*/
#define UBRK_DONE ((int32_t) -1)
/**
* Enum constants for the word break tags returned by
* getRuleStatus(). A range of values is defined for each category of
* word, to allow for further subdivisions of a category in future releases.
* Applications should check for tag values falling within the range, rather
* than for single individual values.
*
* The numeric values of all of these constants are stable (will not change).
*
* @stable ICU 2.2
*/
typedef enum UWordBreak {
/** Tag value for "words" that do not fit into any of other categories.
* Includes spaces and most punctuation. */
UBRK_WORD_NONE = 0,
/** Upper bound for tags for uncategorized words. */
UBRK_WORD_NONE_LIMIT = 100,
/** Tag value for words that appear to be numbers, lower limit. */
UBRK_WORD_NUMBER = 100,
/** Tag value for words that appear to be numbers, upper limit. */
UBRK_WORD_NUMBER_LIMIT = 200,
/** Tag value for words that contain letters, excluding
* hiragana, katakana or ideographic characters, lower limit. */
UBRK_WORD_LETTER = 200,
/** Tag value for words containing letters, upper limit */
UBRK_WORD_LETTER_LIMIT = 300,
/** Tag value for words containing kana characters, lower limit */
UBRK_WORD_KANA = 300,
/** Tag value for words containing kana characters, upper limit */
UBRK_WORD_KANA_LIMIT = 400,
/** Tag value for words containing ideographic characters, lower limit */
UBRK_WORD_IDEO = 400,
/** Tag value for words containing ideographic characters, upper limit */
UBRK_WORD_IDEO_LIMIT = 500
} UWordBreak;
/**
* Enum constants for the line break tags returned by getRuleStatus().
* A range of values is defined for each category of
* word, to allow for further subdivisions of a category in future releases.
* Applications should check for tag values falling within the range, rather
* than for single individual values.
*
* The numeric values of all of these constants are stable (will not change).
*
* @stable ICU 2.8
*/
typedef enum ULineBreakTag {
/** Tag value for soft line breaks, positions at which a line break
* is acceptable but not required */
UBRK_LINE_SOFT = 0,
/** Upper bound for soft line breaks. */
UBRK_LINE_SOFT_LIMIT = 100,
/** Tag value for a hard, or mandatory line break */
UBRK_LINE_HARD = 100,
/** Upper bound for hard line breaks. */
UBRK_LINE_HARD_LIMIT = 200
} ULineBreakTag;
/**
* Enum constants for the sentence break tags returned by getRuleStatus().
* A range of values is defined for each category of
* sentence, to allow for further subdivisions of a category in future releases.
* Applications should check for tag values falling within the range, rather
* than for single individual values.
*
* The numeric values of all of these constants are stable (will not change).
*
* @stable ICU 2.8
*/
typedef enum USentenceBreakTag {
/** Tag value for for sentences ending with a sentence terminator
* ('.', '?', '!', etc.) character, possibly followed by a
* hard separator (CR, LF, PS, etc.)
*/
UBRK_SENTENCE_TERM = 0,
/** Upper bound for tags for sentences ended by sentence terminators. */
UBRK_SENTENCE_TERM_LIMIT = 100,
/** Tag value for for sentences that do not contain an ending
* sentence terminator ('.', '?', '!', etc.) character, but
* are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
*/
UBRK_SENTENCE_SEP = 100,
/** Upper bound for tags for sentences ended by a separator. */
UBRK_SENTENCE_SEP_LIMIT = 200
/** Tag value for a hard, or mandatory line break */
} USentenceBreakTag;
/**
* Open a new UBreakIterator for locating text boundaries for a specified locale.
* A UBreakIterator may be used for detecting character, line, word,
* and sentence breaks in text.
* @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
* UBRK_LINE, UBRK_SENTENCE
* @param locale The locale specifying the text-breaking conventions. Note that
* locale keys such as "lb" and "ss" may be used to modify text break behavior,
* see general discussion of BreakIterator C API.
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
* used to specify the text to be iterated.
* @param textLength The number of characters in text, or -1 if null-terminated.
* @param status A UErrorCode to receive any errors.
* @return A UBreakIterator for the specified locale.
* @see ubrk_openRules
* @stable ICU 2.0
*/
U_CAPI UBreakIterator* U_EXPORT2
ubrk_open(UBreakIteratorType type,
const char *locale,
const UChar *text,
int32_t textLength,
UErrorCode *status);
/**
* Open a new UBreakIterator for locating text boundaries using specified breaking rules.
* The rule syntax is ... (TBD)
* @param rules A set of rules specifying the text breaking conventions.
* @param rulesLength The number of characters in rules, or -1 if null-terminated.
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
* used to specify the text to be iterated.
* @param textLength The number of characters in text, or -1 if null-terminated.
* @param parseErr Receives position and context information for any syntax errors
* detected while parsing the rules.
* @param status A UErrorCode to receive any errors.
* @return A UBreakIterator for the specified rules.
* @see ubrk_open
* @stable ICU 2.2
*/
U_CAPI UBreakIterator* U_EXPORT2
ubrk_openRules(const UChar *rules,
int32_t rulesLength,
const UChar *text,
int32_t textLength,
UParseError *parseErr,
UErrorCode *status);
/**
* Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
* Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
* Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
* compatible across different major versions of ICU, nor across platforms of different
* endianness or different base character set family (ASCII vs EBCDIC).
* @param binaryRules A set of compiled binary rules specifying the text breaking
* conventions. Ownership of the storage containing the compiled
* rules remains with the caller of this function. The compiled
* rules must not be modified or deleted during the life of the
* break iterator.
* @param rulesLength The length of binaryRules in bytes; must be >= 0.
* @param text The text to be iterated over. May be null, in which case
* ubrk_setText() is used to specify the text to be iterated.
* @param textLength The number of characters in text, or -1 if null-terminated.
* @param status Pointer to UErrorCode to receive any errors.
* @return UBreakIterator for the specified rules.
* @see ubrk_getBinaryRules
* @stable ICU 59
*/
U_CAPI UBreakIterator* U_EXPORT2
ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
const UChar * text, int32_t textLength,
UErrorCode * status);
#ifndef U_HIDE_DEPRECATED_API
/**
* Thread safe cloning operation
* @param bi iterator to be cloned
* @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
* user allocated space for the new clone. If NULL new memory will be allocated.
* If buffer is not large enough, new memory will be allocated.
* Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
* @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
* pointer to size of allocated space.
* If *pBufferSize == 0, a sufficient size for use in cloning will
* be returned ('pre-flighting')
* If *pBufferSize is not enough for a stack-based safe clone,
* new memory will be allocated.
* @param status to indicate whether the operation went on smoothly or there were errors
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
* @return pointer to the new clone
* @deprecated ICU 69 Use ubrk_clone() instead.
*/
U_CAPI UBreakIterator * U_EXPORT2
ubrk_safeClone(
const UBreakIterator *bi,
void *stackBuffer,
int32_t *pBufferSize,
UErrorCode *status);
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/**
* Thread safe cloning operation.
* @param bi iterator to be cloned
* @param status to indicate whether the operation went on smoothly or there were errors
* @return pointer to the new clone
* @draft ICU 69
*/
U_CAPI UBreakIterator * U_EXPORT2
ubrk_clone(const UBreakIterator *bi,
UErrorCode *status);
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
* @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
*/
#define U_BRK_SAFECLONE_BUFFERSIZE 1
#endif /* U_HIDE_DEPRECATED_API */
/**
* Close a UBreakIterator.
* Once closed, a UBreakIterator may no longer be used.
* @param bi The break iterator to close.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
ubrk_close(UBreakIterator *bi);
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
/**
* \class LocalUBreakIteratorPointer
* "Smart pointer" class, closes a UBreakIterator via ubrk_close().
* For most methods see the LocalPointerBase base class.
*
* @see LocalPointerBase
* @see LocalPointer
* @stable ICU 4.4
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
U_NAMESPACE_END
#endif
/**
* Sets an existing iterator to point to a new piece of text.
* The break iterator retains a pointer to the supplied text.
* The caller must not modify or delete the text while the BreakIterator
* retains the reference.
*
* @param bi The iterator to use
* @param text The text to be set
* @param textLength The length of the text
* @param status The error code
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
ubrk_setText(UBreakIterator* bi,
const UChar* text,
int32_t textLength,
UErrorCode* status);
/**
* Sets an existing iterator to point to a new piece of text.
*
* All index positions returned by break iterator functions are
* native indices from the UText. For example, when breaking UTF-8
* encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
* will be UTF-8 string indices, not UTF-16 positions.
*
* @param bi The iterator to use
* @param text The text to be set.
* This function makes a shallow clone of the supplied UText. This means
* that the caller is free to immediately close or otherwise reuse the
* UText that was passed as a parameter, but that the underlying text itself
* must not be altered while being referenced by the break iterator.
* @param status The error code
* @stable ICU 3.4
*/
U_CAPI void U_EXPORT2
ubrk_setUText(UBreakIterator* bi,
UText* text,
UErrorCode* status);
/**
* Determine the most recently-returned text boundary.
*
* @param bi The break iterator to use.
* @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
* \ref ubrk_first, or \ref ubrk_last.
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
ubrk_current(const UBreakIterator *bi);
/**
* Advance the iterator to the boundary following the current boundary.
*
* @param bi The break iterator to use.
* @return The character index of the next text boundary, or UBRK_DONE
* if all text boundaries have been returned.
* @see ubrk_previous
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
ubrk_next(UBreakIterator *bi);
/**
* Set the iterator position to the boundary preceding the current boundary.
*
* @param bi The break iterator to use.
* @return The character index of the preceding text boundary, or UBRK_DONE
* if all text boundaries have been returned.
* @see ubrk_next
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
ubrk_previous(UBreakIterator *bi);
/**
* Set the iterator position to zero, the start of the text being scanned.
* @param bi The break iterator to use.
* @return The new iterator position (zero).
* @see ubrk_last
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
ubrk_first(UBreakIterator *bi);
/**
* Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
* This is not the same as the last character.
* @param bi The break iterator to use.
* @return The character offset immediately <EM>beyond</EM> the last character in the
* text being scanned.
* @see ubrk_first
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
ubrk_last(UBreakIterator *bi);
/**
* Set the iterator position to the first boundary preceding the specified offset.
* The new position is always smaller than offset, or UBRK_DONE.
* @param bi The break iterator to use.
* @param offset The offset to begin scanning.
* @return The text boundary preceding offset, or UBRK_DONE.
* @see ubrk_following
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
ubrk_preceding(UBreakIterator *bi,
int32_t offset);
/**
* Advance the iterator to the first boundary following the specified offset.
* The value returned is always greater than offset, or UBRK_DONE.
* @param bi The break iterator to use.
* @param offset The offset to begin scanning.
* @return The text boundary following offset, or UBRK_DONE.
* @see ubrk_preceding
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
ubrk_following(UBreakIterator *bi,
int32_t offset);
/**
* Get a locale for which text breaking information is available.
* A UBreakIterator in a locale returned by this function will perform the correct
* text breaking for the locale.
* @param index The index of the desired locale.
* @return A locale for which number text breaking information is available, or 0 if none.
* @see ubrk_countAvailable
* @stable ICU 2.0
*/
U_CAPI const char* U_EXPORT2
ubrk_getAvailable(int32_t index);
/**
* Determine how many locales have text breaking information available.
* This function is most useful as determining the loop ending condition for
* calls to \ref ubrk_getAvailable.
* @return The number of locales for which text breaking information is available.
* @see ubrk_getAvailable
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
ubrk_countAvailable(void);
/**
* Returns true if the specified position is a boundary position. As a side
* effect, leaves the iterator pointing to the first boundary position at
* or after "offset".
* @param bi The break iterator to use.
* @param offset the offset to check.
* @return True if "offset" is a boundary position.
* @stable ICU 2.0
*/
U_CAPI UBool U_EXPORT2
ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
/**
* Return the status from the break rule that determined the most recently
* returned break position. The values appear in the rule source
* within brackets, {123}, for example. For rules that do not specify a
* status, a default value of 0 is returned.
* <p>
* For word break iterators, the possible values are defined in enum UWordBreak.
* @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
ubrk_getRuleStatus(UBreakIterator *bi);
/**
* Get the statuses from the break rules that determined the most recently
* returned break position. The values appear in the rule source
* within brackets, {123}, for example. The default status value for rules
* that do not explicitly provide one is zero.
* <p>
* For word break iterators, the possible values are defined in enum UWordBreak.
* @param bi The break iterator to use
* @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the
* normal way, without attempting to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator.
* @stable ICU 3.0
*/
U_CAPI int32_t U_EXPORT2
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
/**
* Return the locale of the break iterator. You can choose between the valid and
* the actual locale.
* @param bi break iterator
* @param type locale type (valid or actual)
* @param status error code
* @return locale string
* @stable ICU 2.8
*/
U_CAPI const char* U_EXPORT2
ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
/**
* Set the subject text string upon which the break iterator is operating
* without changing any other aspect of the state.
* The new and previous text strings must have the same content.
*
* This function is intended for use in environments where ICU is operating on
* strings that may move around in memory. It provides a mechanism for notifying
* ICU that the string has been relocated, and providing a new UText to access the
* string in its new position.
*
* Note that the break iterator never copies the underlying text
* of a string being processed, but always operates directly on the original text
* provided by the user. Refreshing simply drops the references to the old text
* and replaces them with references to the new.
*
* Caution: this function is normally used only by very specialized
* system-level code. One example use case is with garbage collection
* that moves the text in memory.
*
* @param bi The break iterator.
* @param text The new (moved) text string.
* @param status Receives errors detected by this function.
*
* @stable ICU 49
*/
U_CAPI void U_EXPORT2
ubrk_refreshUText(UBreakIterator *bi,
UText *text,
UErrorCode *status);
/**
* Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
* The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
* more quickly than using ubrk_openRules. The compiled rules are not compatible across
* different major versions of ICU, nor across platforms of different endianness or
* different base character set family (ASCII vs EBCDIC). Supports preflighting (with
* binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
* the binaryRules buffer. However, whether preflighting or not, if the actual length
* is greater than INT32_MAX, then the function returns 0 and sets *status to
* U_INDEX_OUTOFBOUNDS_ERROR.
* @param bi The break iterator to use.
* @param binaryRules Buffer to receive the compiled binary rules; set to NULL for
* preflighting.
* @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
* preflighting. Must be >= 0.
* @param status Pointer to UErrorCode to receive any errors, such as
* U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
* U_ILLEGAL_ARGUMENT_ERROR.
* @return The actual byte length of the binary rules, if <= INT32_MAX;
* otherwise 0. If not preflighting and this is larger than
* rulesCapacity, *status will be set to an error.
* @see ubrk_openBinaryRules
* @stable ICU 59
*/
U_CAPI int32_t U_EXPORT2
ubrk_getBinaryRules(UBreakIterator *bi,
uint8_t * binaryRules, int32_t rulesCapacity,
UErrorCode * status);
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,465 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
*
* ucnv_err.h:
*/
/**
* \file
* \brief C UConverter predefined error callbacks
*
* <h2>Error Behaviour Functions</h2>
* Defines some error behaviour functions called by ucnv_{from,to}Unicode
* These are provided as part of ICU and many are stable, but they
* can also be considered only as an example of what can be done with
* callbacks. You may of course write your own.
*
* If you want to write your own, you may also find the functions from
* ucnv_cb.h useful when writing your own callbacks.
*
* These functions, although public, should NEVER be called directly.
* They should be used as parameters to the ucnv_setFromUCallback
* and ucnv_setToUCallback functions, to set the behaviour of a converter
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
*
* usage example: 'STOP' doesn't need any context, but newContext
* could be set to something other than 'NULL' if needed. The available
* contexts in this header can modify the default behavior of the callback.
*
* \code
* UErrorCode err = U_ZERO_ERROR;
* UConverter *myConverter = ucnv_open("ibm-949", &err);
* const void *oldContext;
* UConverterFromUCallback oldAction;
*
*
* if (U_SUCCESS(err))
* {
* ucnv_setFromUCallBack(myConverter,
* UCNV_FROM_U_CALLBACK_STOP,
* NULL,
* &oldAction,
* &oldContext,
* &status);
* }
* \endcode
*
* The code above tells "myConverter" to stop when it encounters an
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
* Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
* and ucnv_setToUCallBack would need to be called in order to change
* that behavior too.
*
* Here is an example with a context:
*
* \code
* UErrorCode err = U_ZERO_ERROR;
* UConverter *myConverter = ucnv_open("ibm-949", &err);
* const void *oldContext;
* UConverterFromUCallback oldAction;
*
*
* if (U_SUCCESS(err))
* {
* ucnv_setToUCallBack(myConverter,
* UCNV_TO_U_CALLBACK_SUBSTITUTE,
* UCNV_SUB_STOP_ON_ILLEGAL,
* &oldAction,
* &oldContext,
* &status);
* }
* \endcode
*
* The code above tells "myConverter" to stop when it encounters an
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
* Codepage -> Unicode. Any unmapped and legal characters will be
* substituted to be the default substitution character.
*/
#ifndef UCNV_ERR_H
#define UCNV_ERR_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
/** Forward declaring the UConverter structure. @stable ICU 2.0 */
struct UConverter;
/** @stable ICU 2.0 */
typedef struct UConverter UConverter;
/**
* FROM_U, TO_U context options for sub callback
* @stable ICU 2.0
*/
#define UCNV_SUB_STOP_ON_ILLEGAL "i"
/**
* FROM_U, TO_U context options for skip callback
* @stable ICU 2.0
*/
#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_ICU NULL
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_JAVA "J"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
* TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_C "C"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_DEC "D"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_HEX "X"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_UNICODE "U"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
* a backslash, 1..6 hex digits, and a space)
* @stable ICU 4.0
*/
#define UCNV_ESCAPE_CSS2 "S"
/**
* The process condition code to be used with the callbacks.
* Codes which are greater than UCNV_IRREGULAR should be
* passed on to any chained callbacks.
* @stable ICU 2.0
*/
typedef enum {
UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
The error code U_INVALID_CHAR_FOUND will be set. */
UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
\\x81\\x2E is illegal in SJIS because \\x2E
is not a valid trail byte for the \\x81
lead byte.
Also, starting with Unicode 3.0.1, non-shortest byte sequences
in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
are also illegal, not just irregular.
The error code U_ILLEGAL_CHAR_FOUND will be set. */
UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
are irregular UTF-8 byte sequences for single surrogate
code points.
The error code U_INVALID_CHAR_FOUND will be set. */
UCNV_RESET = 3, /**< The callback is called with this reason when a
'reset' has occurred. Callback should reset all
state. */
UCNV_CLOSE = 4, /**< Called when the converter is closed. The
callback should release any allocated memory.*/
UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
converter. the pointer available as the
'context' is an alias to the original converters'
context pointer. If the context must be owned
by the new converter, the callback must clone
the data and call ucnv_setFromUCallback
(or setToUCallback) with the correct pointer.
@stable ICU 2.2
*/
} UConverterCallbackReason;
/**
* The structure for the fromUnicode callback function parameter.
* @stable ICU 2.0
*/
typedef struct {
uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
} UConverterFromUnicodeArgs;
/**
* The structure for the toUnicode callback function parameter.
* @stable ICU 2.0
*/
typedef struct {
uint16_t size; /**< The size of this struct @stable ICU 2.0 */
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
} UConverterToUnicodeArgs;
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
*
* @param context Pointer to the callback's private data
* @param fromUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
* @param reason Defines the reason the callback was invoked
* @param err This should always be set to a failure status prior to calling.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
*
* @param context Pointer to the callback's private data
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err This should always be set to a failure status prior to calling.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This From Unicode callback skips any ILLEGAL_SEQUENCE, or
* skips only UNASSINGED_SEQUENCE depending on the context parameter
* simply ignoring those characters.
*
* @param context The function currently recognizes the callback options:
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Skips any ILLEGAL_SEQUENCE
* @param fromUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
* UNASSIGNED_SEQUENCE depending on context parameter, with the
* current substitution string for the converter. This is the default
* callback.
*
* @param context The function currently recognizes the callback options:
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Substitutes any ILLEGAL_SEQUENCE
* @param fromUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @see ucnv_setSubstChars
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
* hexadecimal representation of the illegal codepoints
*
* @param context The function currently recognizes the callback options:
* <ul>
* <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
* representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
* In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* %UD84D%UDC56</li>
* <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
* In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* \\uD84D\\uDC56</li>
* <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
* In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* \\U00023456</li>
* <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
* representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly.
* In the Event the converter doesn't support the characters {&amp;,#}[0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* &amp;#144470; and Zero padding is ignored.</li>
* <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
* representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly.
* In the Event the converter doesn't support the characters {&,#,x}[0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* \htmlonly&amp;#x23456;\endhtmlonly</li>
* </ul>
* @param fromUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback skips any ILLEGAL_SEQUENCE, or
* skips only UNASSINGED_SEQUENCE depending on the context parameter
* simply ignoring those characters.
*
* @param context The function currently recognizes the callback options:
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Skips any ILLEGAL_SEQUENCE
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
* UNASSIGNED_SEQUENCE depending on context parameter, with the
* Unicode substitution character, U+FFFD.
*
* @param context The function currently recognizes the callback options:
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Substitutes any ILLEGAL_SEQUENCE
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
* hexadecimal representation of the illegal bytes
* (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
*
* @param context This function currently recognizes the callback options:
* UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
* UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
#endif
#endif
/*UCNV_ERR_H*/

View File

@@ -0,0 +1,456 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uconfig.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002sep19
* created by: Markus W. Scherer
*/
#ifndef __UCONFIG_H__
#define __UCONFIG_H__
/*!
* \file
* \brief User-configurable settings
*
* Miscellaneous switches:
*
* A number of macros affect a variety of minor aspects of ICU.
* Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
* and moved here to make them easier to find.
*
* Switches for excluding parts of ICU library code modules:
*
* Changing these macros allows building partial, smaller libraries for special purposes.
* By default, all modules are built.
* The switches are fairly coarse, controlling large modules.
* Basic services cannot be turned off.
*
* Building with any of these options does not guarantee that the
* ICU build process will completely work. It is recommended that
* the ICU libraries and data be built using the normal build.
* At that time you should remove the data used by those services.
* After building the ICU data library, you should rebuild the ICU
* libraries with these switches customized to your needs.
*
* @stable ICU 2.4
*/
/**
* If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
* prior to determining default settings for uconfig variables.
*
* @internal ICU 4.0
*/
#if defined(UCONFIG_USE_LOCAL)
#include "uconfig_local.h"
#endif
/**
* \def U_DEBUG
* Determines whether to include debugging code.
* Automatically set on Windows, but most compilers do not have
* related predefined macros.
* @internal
*/
#ifdef U_DEBUG
/* Use the predefined value. */
#elif defined(_DEBUG)
/*
* _DEBUG is defined by Visual Studio debug compilation.
* Do *not* test for its NDEBUG macro: It is an orthogonal macro
* which disables assert().
*/
# define U_DEBUG 1
# else
# define U_DEBUG 0
#endif
/**
* Determines whether to enable auto cleanup of libraries.
* @internal
*/
#ifndef UCLN_NO_AUTO_CLEANUP
#define UCLN_NO_AUTO_CLEANUP 1
#endif
/**
* \def U_DISABLE_RENAMING
* Determines whether to disable renaming or not.
* @internal
*/
#ifndef U_DISABLE_RENAMING
#define U_DISABLE_RENAMING 0
#endif
/**
* \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
* Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
* utypes.h includes those headers if this macro is defined to 0.
* Otherwise, each those headers must be included explicitly when using one of their macros.
* Defaults to 0 for backward compatibility, except inside ICU.
* @stable ICU 49
*/
#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
/* Use the predefined value. */
#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
defined(U_TOOLUTIL_IMPLEMENTATION)
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
#else
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
#endif
/**
* \def U_OVERRIDE_CXX_ALLOCATION
* Determines whether to override new and delete.
* ICU is normally built such that all of its C++ classes, via their UMemory base,
* override operators new and delete to use its internal, customizable,
* non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
*
* This is especially important when the application and its libraries use multiple heaps.
* For example, on Windows, this allows the ICU DLL to be used by
* applications that statically link the C Runtime library.
*
* @stable ICU 2.2
*/
#ifndef U_OVERRIDE_CXX_ALLOCATION
#define U_OVERRIDE_CXX_ALLOCATION 1
#endif
/**
* \def U_ENABLE_TRACING
* Determines whether to enable tracing.
* @internal
*/
#ifndef U_ENABLE_TRACING
#define U_ENABLE_TRACING 0
#endif
/**
* \def UCONFIG_ENABLE_PLUGINS
* Determines whether to enable ICU plugins.
* @internal
*/
#ifndef UCONFIG_ENABLE_PLUGINS
#define UCONFIG_ENABLE_PLUGINS 0
#endif
/**
* \def U_ENABLE_DYLOAD
* Whether to enable Dynamic loading in ICU.
* @internal
*/
#ifndef U_ENABLE_DYLOAD
#define U_ENABLE_DYLOAD 1
#endif
/**
* \def U_CHECK_DYLOAD
* Whether to test Dynamic loading as an OS capability.
* @internal
*/
#ifndef U_CHECK_DYLOAD
#define U_CHECK_DYLOAD 1
#endif
/**
* \def U_DEFAULT_SHOW_DRAFT
* Do we allow ICU users to use the draft APIs by default?
* @internal
*/
#ifndef U_DEFAULT_SHOW_DRAFT
#define U_DEFAULT_SHOW_DRAFT 1
#endif
/*===========================================================================*/
/* Custom icu entry point renaming */
/*===========================================================================*/
/**
* \def U_HAVE_LIB_SUFFIX
* 1 if a custom library suffix is set.
* @internal
*/
#ifdef U_HAVE_LIB_SUFFIX
/* Use the predefined value. */
#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN)
# define U_HAVE_LIB_SUFFIX 1
#endif
/**
* \def U_LIB_SUFFIX_C_NAME_STRING
* Defines the library suffix as a string with C syntax.
* @internal
*/
#ifdef U_LIB_SUFFIX_C_NAME_STRING
/* Use the predefined value. */
#elif defined(U_LIB_SUFFIX_C_NAME)
# define CONVERT_TO_STRING(s) #s
# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
#else
# define U_LIB_SUFFIX_C_NAME_STRING ""
#endif
/* common/i18n library switches --------------------------------------------- */
/**
* \def UCONFIG_ONLY_COLLATION
* This switch turns off modules that are not needed for collation.
*
* It does not turn off legacy conversion because that is necessary
* for ICU to work on EBCDIC platforms (for the default converter).
* If you want "only collation" and do not build for EBCDIC,
* then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_ONLY_COLLATION
# define UCONFIG_ONLY_COLLATION 0
#endif
#if UCONFIG_ONLY_COLLATION
/* common library */
# define UCONFIG_NO_BREAK_ITERATION 1
# define UCONFIG_NO_IDNA 1
/* i18n library */
# if UCONFIG_NO_COLLATION
# error Contradictory collation switches in uconfig.h.
# endif
# define UCONFIG_NO_FORMATTING 1
# define UCONFIG_NO_TRANSLITERATION 1
# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
#endif
/* common library switches -------------------------------------------------- */
/**
* \def UCONFIG_NO_FILE_IO
* This switch turns off all file access in the common library
* where file access is only used for data loading.
* ICU data must then be provided in the form of a data DLL (or with an
* equivalent way to link to the data residing in an executable,
* as in building a combined library with both the common library's code and
* the data), or via udata_setCommonData().
* Application data must be provided via udata_setAppData() or by using
* "open" functions that take pointers to data, for example ucol_openBinary().
*
* File access is not used at all in the i18n library.
*
* File access cannot be turned off for the icuio library or for the ICU
* test suites and ICU tools.
*
* @stable ICU 3.6
*/
#ifndef UCONFIG_NO_FILE_IO
# define UCONFIG_NO_FILE_IO 0
#endif
#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR)
# error Contradictory file io switches in uconfig.h.
#endif
/**
* \def UCONFIG_NO_CONVERSION
* ICU will not completely build (compiling the tools fails) with this
* switch turned on.
* This switch turns off all converters.
*
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
* in utypes.h if char* strings in your environment are always in UTF-8.
*
* @stable ICU 3.2
* @see U_CHARSET_IS_UTF8
*/
#ifndef UCONFIG_NO_CONVERSION
# define UCONFIG_NO_CONVERSION 0
#endif
#if UCONFIG_NO_CONVERSION
# define UCONFIG_NO_LEGACY_CONVERSION 1
#endif
/**
* \def UCONFIG_ONLY_HTML_CONVERSION
* This switch turns off all of the converters NOT listed in
* the HTML encoding standard:
* http://www.w3.org/TR/encoding/#names-and-labels
*
* This is not possible on EBCDIC platforms
* because they need ibm-37 or ibm-1047 default converters.
*
* @stable ICU 55
*/
#ifndef UCONFIG_ONLY_HTML_CONVERSION
# define UCONFIG_ONLY_HTML_CONVERSION 0
#endif
/**
* \def UCONFIG_NO_LEGACY_CONVERSION
* This switch turns off all converters except for
* - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
* - US-ASCII
* - ISO-8859-1
*
* Turning off legacy conversion is not possible on EBCDIC platforms
* because they need ibm-37 or ibm-1047 default converters.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_LEGACY_CONVERSION
# define UCONFIG_NO_LEGACY_CONVERSION 0
#endif
/**
* \def UCONFIG_NO_NORMALIZATION
* This switch turns off normalization.
* It implies turning off several other services as well, for example
* collation and IDNA.
*
* @stable ICU 2.6
*/
#ifndef UCONFIG_NO_NORMALIZATION
# define UCONFIG_NO_NORMALIZATION 0
#endif
#if UCONFIG_NO_NORMALIZATION
/* common library */
/* ICU 50 CJK dictionary BreakIterator uses normalization */
# define UCONFIG_NO_BREAK_ITERATION 1
/* IDNA (UTS #46) is implemented via normalization */
# define UCONFIG_NO_IDNA 1
/* i18n library */
# if UCONFIG_ONLY_COLLATION
# error Contradictory collation switches in uconfig.h.
# endif
# define UCONFIG_NO_COLLATION 1
# define UCONFIG_NO_TRANSLITERATION 1
#endif
/**
* \def UCONFIG_NO_BREAK_ITERATION
* This switch turns off break iteration.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_BREAK_ITERATION
# define UCONFIG_NO_BREAK_ITERATION 0
#endif
/**
* \def UCONFIG_NO_IDNA
* This switch turns off IDNA.
*
* @stable ICU 2.6
*/
#ifndef UCONFIG_NO_IDNA
# define UCONFIG_NO_IDNA 0
#endif
/**
* \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
* Determines the default UMessagePatternApostropheMode.
* See the documentation for that enum.
*
* @stable ICU 4.8
*/
#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
#endif
/**
* \def UCONFIG_USE_WINDOWS_LCID_MAPPING_API
* On platforms where U_PLATFORM_HAS_WIN32_API is true, this switch determines
* if the Windows platform APIs are used for LCID<->Locale Name conversions.
* Otherwise, only the built-in ICU tables are used.
*
* @internal ICU 64
*/
#ifndef UCONFIG_USE_WINDOWS_LCID_MAPPING_API
# define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1
#endif
/* i18n library switches ---------------------------------------------------- */
/**
* \def UCONFIG_NO_COLLATION
* This switch turns off collation and collation-based string search.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_COLLATION
# define UCONFIG_NO_COLLATION 0
#endif
/**
* \def UCONFIG_NO_FORMATTING
* This switch turns off formatting and calendar/timezone services.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_FORMATTING
# define UCONFIG_NO_FORMATTING 0
#endif
/**
* \def UCONFIG_NO_TRANSLITERATION
* This switch turns off transliteration.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_TRANSLITERATION
# define UCONFIG_NO_TRANSLITERATION 0
#endif
/**
* \def UCONFIG_NO_REGULAR_EXPRESSIONS
* This switch turns off regular expressions.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
#endif
/**
* \def UCONFIG_NO_SERVICE
* This switch turns off service registration.
*
* @stable ICU 3.2
*/
#ifndef UCONFIG_NO_SERVICE
# define UCONFIG_NO_SERVICE 0
#endif
/**
* \def UCONFIG_HAVE_PARSEALLINPUT
* This switch turns on the "parse all input" attribute. Binary incompatible.
*
* @internal
*/
#ifndef UCONFIG_HAVE_PARSEALLINPUT
# define UCONFIG_HAVE_PARSEALLINPUT 1
#endif
/**
* \def UCONFIG_NO_FILTERED_BREAK_ITERATION
* This switch turns off filtered break iteration code.
*
* @internal
*/
#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
#endif
#endif // __UCONFIG_H__

View File

@@ -0,0 +1,159 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// ucpmap.h
// created: 2018sep03 Markus W. Scherer
#ifndef __UCPMAP_H__
#define __UCPMAP_H__
#include "unicode/utypes.h"
U_CDECL_BEGIN
/**
* \file
*
* This file defines an abstract map from Unicode code points to integer values.
*
* @see UCPMap
* @see UCPTrie
* @see UMutableCPTrie
*/
/**
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
*
* @see UCPTrie
* @see UMutableCPTrie
* @stable ICU 63
*/
typedef struct UCPMap UCPMap;
/**
* Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
* Most users should use UCPMAP_RANGE_NORMAL.
*
* @see ucpmap_getRange
* @see ucptrie_getRange
* @see umutablecptrie_getRange
* @stable ICU 63
*/
enum UCPMapRangeOption {
/**
* ucpmap_getRange() enumerates all same-value ranges as stored in the map.
* Most users should use this option.
* @stable ICU 63
*/
UCPMAP_RANGE_NORMAL,
/**
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
* surrogateValue, which is passed to getRange() as a separate parameter.
* The surrogateValue is not transformed via filter().
* See U_IS_LEAD(c).
*
* Most users should use UCPMAP_RANGE_NORMAL instead.
*
* This option is useful for maps that map surrogate code *units* to
* special values optimized for UTF-16 string processing
* or for special error behavior for unpaired surrogates,
* but those values are not to be associated with the lead surrogate code *points*.
* @stable ICU 63
*/
UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
/**
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
* except that all surrogates (U+D800..U+DFFF) are treated as having the
* surrogateValue, which is passed to getRange() as a separate parameter.
* The surrogateValue is not transformed via filter().
* See U_IS_SURROGATE(c).
*
* Most users should use UCPMAP_RANGE_NORMAL instead.
*
* This option is useful for maps that map surrogate code *units* to
* special values optimized for UTF-16 string processing
* or for special error behavior for unpaired surrogates,
* but those values are not to be associated with the lead surrogate code *points*.
* @stable ICU 63
*/
UCPMAP_RANGE_FIXED_ALL_SURROGATES
};
#ifndef U_IN_DOXYGEN
typedef enum UCPMapRangeOption UCPMapRangeOption;
#endif
/**
* Returns the value for a code point as stored in the map, with range checking.
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
*
* @param map the map
* @param c the code point
* @return the map value,
* or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
* @stable ICU 63
*/
U_CAPI uint32_t U_EXPORT2
ucpmap_get(const UCPMap *map, UChar32 c);
/**
* Callback function type: Modifies a map value.
* Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
* The modified value will be returned by the getRange function.
*
* Can be used to ignore some of the value bits,
* make a filter for one of several values,
* return a value index computed from the map value, etc.
*
* @param context an opaque pointer, as passed into the getRange function
* @param value a value from the map
* @return the modified value
* @stable ICU 63
*/
typedef uint32_t U_CALLCONV
UCPMapValueFilter(const void *context, uint32_t value);
/**
* Returns the last code point such that all those from start to there have the same value.
* Can be used to efficiently iterate over all same-value ranges in a map.
* (This is normally faster than iterating over code points and get()ting each value,
* but much slower than a data structure that stores ranges directly.)
*
* If the UCPMapValueFilter function pointer is not NULL, then
* the value to be delivered is passed through that function, and the return value is the end
* of the range where all values are modified to the same actual value.
* The value is unchanged if that function pointer is NULL.
*
* Example:
* \code
* UChar32 start = 0, end;
* uint32_t value;
* while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
* NULL, NULL, &value)) >= 0) {
* // Work with the range start..end and its value.
* start = end + 1;
* }
* \endcode
*
* @param map the map
* @param start range start
* @param option defines whether surrogates are treated normally,
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
* @param filter a pointer to a function that may modify the map data value,
* or NULL if the values from the map are to be used unmodified
* @param context an opaque pointer that is passed on to the filter function
* @param pValue if not NULL, receives the value that every code point start..end has;
* may have been modified by filter(context, map value)
* if that function pointer is not NULL
* @return the range end code point, or -1 if start is not a valid code point
* @stable ICU 63
*/
U_CAPI UChar32 U_EXPORT2
ucpmap_getRange(const UCPMap *map, UChar32 start,
UCPMapRangeOption option, uint32_t surrogateValue,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
U_CDECL_END
#endif

View File

@@ -0,0 +1,209 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2002-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uenum.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:2
*
* created on: 2002jul08
* created by: Vladimir Weinstein
*/
#ifndef __UENUM_H
#define __UENUM_H
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/localpointer.h"
U_NAMESPACE_BEGIN
class StringEnumeration;
U_NAMESPACE_END
#endif // U_SHOW_CPLUSPLUS_API
/**
* \file
* \brief C API: String Enumeration
*/
/**
* An enumeration object.
* For usage in C programs.
* @stable ICU 2.2
*/
struct UEnumeration;
/** structure representing an enumeration object instance @stable ICU 2.2 */
typedef struct UEnumeration UEnumeration;
/**
* Disposes of resources in use by the iterator. If en is NULL,
* does nothing. After this call, any char* or UChar* pointer
* returned by uenum_unext() or uenum_next() is invalid.
* @param en UEnumeration structure pointer
* @stable ICU 2.2
*/
U_CAPI void U_EXPORT2
uenum_close(UEnumeration* en);
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
/**
* \class LocalUEnumerationPointer
* "Smart pointer" class, closes a UEnumeration via uenum_close().
* For most methods see the LocalPointerBase base class.
*
* @see LocalPointerBase
* @see LocalPointer
* @stable ICU 4.4
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close);
U_NAMESPACE_END
#endif
/**
* Returns the number of elements that the iterator traverses. If
* the iterator is out-of-sync with its service, status is set to
* U_ENUM_OUT_OF_SYNC_ERROR.
* This is a convenience function. It can end up being very
* expensive as all the items might have to be pre-fetched (depending
* on the type of data being traversed). Use with caution and only
* when necessary.
* @param en UEnumeration structure pointer
* @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
* iterator is out of sync.
* @return number of elements in the iterator
* @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
uenum_count(UEnumeration* en, UErrorCode* status);
/**
* Returns the next element in the iterator's list. If there are
* no more elements, returns NULL. If the iterator is out-of-sync
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
* NULL is returned. If the native service string is a char* string,
* it is converted to UChar* with the invariant converter.
* The result is terminated by (UChar)0.
* @param en the iterator object
* @param resultLength pointer to receive the length of the result
* (not including the terminating \\0).
* If the pointer is NULL it is ignored.
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service.
* @return a pointer to the string. The string will be
* zero-terminated. The return pointer is owned by this iterator
* and must not be deleted by the caller. The pointer is valid
* until the next call to any uenum_... method, including
* uenum_next() or uenum_unext(). When all strings have been
* traversed, returns NULL.
* @stable ICU 2.2
*/
U_CAPI const UChar* U_EXPORT2
uenum_unext(UEnumeration* en,
int32_t* resultLength,
UErrorCode* status);
/**
* Returns the next element in the iterator's list. If there are
* no more elements, returns NULL. If the iterator is out-of-sync
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
* NULL is returned. If the native service string is a UChar*
* string, it is converted to char* with the invariant converter.
* The result is terminated by (char)0. If the conversion fails
* (because a character cannot be converted) then status is set to
* U_INVARIANT_CONVERSION_ERROR and the return value is undefined
* (but non-NULL).
* @param en the iterator object
* @param resultLength pointer to receive the length of the result
* (not including the terminating \\0).
* If the pointer is NULL it is ignored.
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service. Set to
* U_INVARIANT_CONVERSION_ERROR if the underlying native string is
* UChar* and conversion to char* with the invariant converter
* fails. This error pertains only to current string, so iteration
* might be able to continue successfully.
* @return a pointer to the string. The string will be
* zero-terminated. The return pointer is owned by this iterator
* and must not be deleted by the caller. The pointer is valid
* until the next call to any uenum_... method, including
* uenum_next() or uenum_unext(). When all strings have been
* traversed, returns NULL.
* @stable ICU 2.2
*/
U_CAPI const char* U_EXPORT2
uenum_next(UEnumeration* en,
int32_t* resultLength,
UErrorCode* status);
/**
* Resets the iterator to the current list of service IDs. This
* re-establishes sync with the service and rewinds the iterator
* to start at the first element.
* @param en the iterator object
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service.
* @stable ICU 2.2
*/
U_CAPI void U_EXPORT2
uenum_reset(UEnumeration* en, UErrorCode* status);
#if U_SHOW_CPLUSPLUS_API
/**
* Given a StringEnumeration, wrap it in a UEnumeration. The
* StringEnumeration is adopted; after this call, the caller must not
* delete it (regardless of error status).
* @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
* @param ec the error code.
* @return a UEnumeration wrapping the adopted StringEnumeration.
* @stable ICU 4.2
*/
U_CAPI UEnumeration* U_EXPORT2
uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
#endif
/**
* Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
* \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
* @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
* @param count length of the array
* @param ec error code
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
* @see uenum_close
* @stable ICU 50
*/
U_CAPI UEnumeration* U_EXPORT2
uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
UErrorCode* ec);
/**
* Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
* \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
* @param strings array of char* strings (each null terminated). All storage is owned by the caller.
* @param count length of the array
* @param ec error code
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
* @see uenum_close
* @stable ICU 50
*/
U_CAPI UEnumeration* U_EXPORT2
uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
UErrorCode* ec);
#endif

View File

@@ -0,0 +1,709 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2002-2011 International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uiter.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jan18
* created by: Markus W. Scherer
*/
#ifndef __UITER_H__
#define __UITER_H__
/**
* \file
* \brief C API: Unicode Character Iteration
*
* @see UCharIterator
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
class CharacterIterator;
class Replaceable;
U_NAMESPACE_END
#endif
U_CDECL_BEGIN
struct UCharIterator;
typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
/**
* Origin constants for UCharIterator.getIndex() and UCharIterator.move().
* @see UCharIteratorMove
* @see UCharIterator
* @stable ICU 2.1
*/
typedef enum UCharIteratorOrigin {
UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
} UCharIteratorOrigin;
/** Constants for UCharIterator. @stable ICU 2.6 */
enum {
/**
* Constant value that may be returned by UCharIteratorMove
* indicating that the final UTF-16 index is not known, but that the move succeeded.
* This can occur when moving relative to limit or length, or
* when moving relative to the current index after a setState()
* when the current UTF-16 index is not known.
*
* It would be very inefficient to have to count from the beginning of the text
* just to get the current/limit/length index after moving relative to it.
* The actual index can be determined with getIndex(UITER_CURRENT)
* which will count the UChars if necessary.
*
* @stable ICU 2.6
*/
UITER_UNKNOWN_INDEX=-2
};
/**
* Constant for UCharIterator getState() indicating an error or
* an unknown state.
* Returned by uiter_getState()/UCharIteratorGetState
* when an error occurs.
* Also, some UCharIterator implementations may not be able to return
* a valid state for each position. This will be clearly documented
* for each such iterator (none of the public ones here).
*
* @stable ICU 2.6
*/
#define UITER_NO_STATE ((uint32_t)0xffffffff)
/**
* Function type declaration for UCharIterator.getIndex().
*
* Gets the current position, or the start or limit of the
* iteration range.
*
* This function may perform slowly for UITER_CURRENT after setState() was called,
* or for UITER_LENGTH, because an iterator implementation may have to count
* UChars if the underlying storage is not UTF-16.
*
* @param iter the UCharIterator structure ("this pointer")
* @param origin get the 0, start, limit, length, or current index
* @return the requested index, or U_SENTINEL in an error condition
*
* @see UCharIteratorOrigin
* @see UCharIterator
* @stable ICU 2.1
*/
typedef int32_t U_CALLCONV
UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
/**
* Function type declaration for UCharIterator.move().
*
* Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
*
* Moves the current position relative to the start or limit of the
* iteration range, or relative to the current position itself.
* The movement is expressed in numbers of code units forward
* or backward by specifying a positive or negative delta.
* Out of bounds movement will be pinned to the start or limit.
*
* This function may perform slowly for moving relative to UITER_LENGTH
* because an iterator implementation may have to count the rest of the
* UChars if the native storage is not UTF-16.
*
* When moving relative to the limit or length, or
* relative to the current position after setState() was called,
* move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
* determination of the actual UTF-16 index.
* The actual index can be determined with getIndex(UITER_CURRENT)
* which will count the UChars if necessary.
* See UITER_UNKNOWN_INDEX for details.
*
* @param iter the UCharIterator structure ("this pointer")
* @param delta can be positive, zero, or negative
* @param origin move relative to the 0, start, limit, length, or current index
* @return the new index, or U_SENTINEL on an error condition,
* or UITER_UNKNOWN_INDEX when the index is not known.
*
* @see UCharIteratorOrigin
* @see UCharIterator
* @see UITER_UNKNOWN_INDEX
* @stable ICU 2.1
*/
typedef int32_t U_CALLCONV
UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
/**
* Function type declaration for UCharIterator.hasNext().
*
* Check if current() and next() can still
* return another code unit.
*
* @param iter the UCharIterator structure ("this pointer")
* @return boolean value for whether current() and next() can still return another code unit
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UBool U_CALLCONV
UCharIteratorHasNext(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.hasPrevious().
*
* Check if previous() can still return another code unit.
*
* @param iter the UCharIterator structure ("this pointer")
* @return boolean value for whether previous() can still return another code unit
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UBool U_CALLCONV
UCharIteratorHasPrevious(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.current().
*
* Return the code unit at the current position,
* or U_SENTINEL if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code unit
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UChar32 U_CALLCONV
UCharIteratorCurrent(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.next().
*
* Return the code unit at the current index and increment
* the index (post-increment, like s[i++]),
* or return U_SENTINEL if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code unit (and post-increment the current index)
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UChar32 U_CALLCONV
UCharIteratorNext(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.previous().
*
* Decrement the index and return the code unit from there
* (pre-decrement, like s[--i]),
* or return U_SENTINEL if there is none (index is at the start).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the previous code unit (after pre-decrementing the current index)
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UChar32 U_CALLCONV
UCharIteratorPrevious(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.reservedFn().
* Reserved for future use.
*
* @param iter the UCharIterator structure ("this pointer")
* @param something some integer argument
* @return some integer
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef int32_t U_CALLCONV
UCharIteratorReserved(UCharIterator *iter, int32_t something);
/**
* Function type declaration for UCharIterator.getState().
*
* Get the "state" of the iterator in the form of a single 32-bit word.
* It is recommended that the state value be calculated to be as small as
* is feasible. For strings with limited lengths, fewer than 32 bits may
* be sufficient.
*
* This is used together with setState()/UCharIteratorSetState
* to save and restore the iterator position more efficiently than with
* getIndex()/move().
*
* The iterator state is defined as a uint32_t value because it is designed
* for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
* of the character iterator.
*
* With some UCharIterator implementations (e.g., UTF-8),
* getting and setting the UTF-16 index with existing functions
* (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
* relatively slow because the iterator has to "walk" from a known index
* to the requested one.
* This takes more time the farther it needs to go.
*
* An opaque state value allows an iterator implementation to provide
* an internal index (UTF-8: the source byte array index) for
* fast, constant-time restoration.
*
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
* the UTF-16 index may not be restored as well, but the iterator can deliver
* the correct text contents and move relative to the current position
* without performance degradation.
*
* Some UCharIterator implementations may not be able to return
* a valid state for each position, in which case they return UITER_NO_STATE instead.
* This will be clearly documented for each such iterator (none of the public ones here).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the state word
*
* @see UCharIterator
* @see UCharIteratorSetState
* @see UITER_NO_STATE
* @stable ICU 2.6
*/
typedef uint32_t U_CALLCONV
UCharIteratorGetState(const UCharIterator *iter);
/**
* Function type declaration for UCharIterator.setState().
*
* Restore the "state" of the iterator using a state word from a getState() call.
* The iterator object need not be the same one as for which getState() was called,
* but it must be of the same type (set up using the same uiter_setXYZ function)
* and it must iterate over the same string
* (binary identical regardless of memory address).
* For more about the state word see UCharIteratorGetState.
*
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
* the UTF-16 index may not be restored as well, but the iterator can deliver
* the correct text contents and move relative to the current position
* without performance degradation.
*
* @param iter the UCharIterator structure ("this pointer")
* @param state the state word from a getState() call
* on a same-type, same-string iterator
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see UCharIterator
* @see UCharIteratorGetState
* @stable ICU 2.6
*/
typedef void U_CALLCONV
UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
/**
* C API for code unit iteration.
* This can be used as a C wrapper around
* CharacterIterator, Replaceable, or implemented using simple strings, etc.
*
* There are two roles for using UCharIterator:
*
* A "provider" sets the necessary function pointers and controls the "protected"
* fields of the UCharIterator structure. A "provider" passes a UCharIterator
* into C APIs that need a UCharIterator as an abstract, flexible string interface.
*
* Implementations of such C APIs are "callers" of UCharIterator functions;
* they only use the "public" function pointers and never access the "protected"
* fields directly.
*
* The current() and next() functions only check the current index against the
* limit, and previous() only checks the current index against the start,
* to see if the iterator already reached the end of the iteration range.
*
* The assumption - in all iterators - is that the index is moved via the API,
* which means it won't go out of bounds, or the index is modified by
* user code that knows enough about the iterator implementation to set valid
* index values.
*
* UCharIterator functions return code unit values 0..0xffff,
* or U_SENTINEL if the iteration bounds are reached.
*
* @stable ICU 2.1
*/
struct UCharIterator {
/**
* (protected) Pointer to string or wrapped object or similar.
* Not used by caller.
* @stable ICU 2.1
*/
const void *context;
/**
* (protected) Length of string or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t length;
/**
* (protected) Start index or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t start;
/**
* (protected) Current index or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t index;
/**
* (protected) Limit index or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t limit;
/**
* (protected) Used by UTF-8 iterators and possibly others.
* @stable ICU 2.1
*/
int32_t reservedField;
/**
* (public) Returns the current position or the
* start or limit index of the iteration range.
*
* @see UCharIteratorGetIndex
* @stable ICU 2.1
*/
UCharIteratorGetIndex *getIndex;
/**
* (public) Moves the current position relative to the start or limit of the
* iteration range, or relative to the current position itself.
* The movement is expressed in numbers of code units forward
* or backward by specifying a positive or negative delta.
*
* @see UCharIteratorMove
* @stable ICU 2.1
*/
UCharIteratorMove *move;
/**
* (public) Check if current() and next() can still
* return another code unit.
*
* @see UCharIteratorHasNext
* @stable ICU 2.1
*/
UCharIteratorHasNext *hasNext;
/**
* (public) Check if previous() can still return another code unit.
*
* @see UCharIteratorHasPrevious
* @stable ICU 2.1
*/
UCharIteratorHasPrevious *hasPrevious;
/**
* (public) Return the code unit at the current position,
* or U_SENTINEL if there is none (index is at the limit).
*
* @see UCharIteratorCurrent
* @stable ICU 2.1
*/
UCharIteratorCurrent *current;
/**
* (public) Return the code unit at the current index and increment
* the index (post-increment, like s[i++]),
* or return U_SENTINEL if there is none (index is at the limit).
*
* @see UCharIteratorNext
* @stable ICU 2.1
*/
UCharIteratorNext *next;
/**
* (public) Decrement the index and return the code unit from there
* (pre-decrement, like s[--i]),
* or return U_SENTINEL if there is none (index is at the start).
*
* @see UCharIteratorPrevious
* @stable ICU 2.1
*/
UCharIteratorPrevious *previous;
/**
* (public) Reserved for future use. Currently NULL.
*
* @see UCharIteratorReserved
* @stable ICU 2.1
*/
UCharIteratorReserved *reservedFn;
/**
* (public) Return the state of the iterator, to be restored later with setState().
* This function pointer is NULL if the iterator does not implement it.
*
* @see UCharIteratorGet
* @stable ICU 2.6
*/
UCharIteratorGetState *getState;
/**
* (public) Restore the iterator state from the state word from a call
* to getState().
* This function pointer is NULL if the iterator does not implement it.
*
* @see UCharIteratorSet
* @stable ICU 2.6
*/
UCharIteratorSetState *setState;
};
/**
* Helper function for UCharIterator to get the code point
* at the current index.
*
* Return the code point that includes the code unit at the current position,
* or U_SENTINEL if there is none (index is at the limit).
* If the current code unit is a lead or trail surrogate,
* then the following or preceding surrogate is used to form
* the code point value.
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point
*
* @see UCharIterator
* @see U16_GET
* @see UnicodeString::char32At()
* @stable ICU 2.1
*/
U_CAPI UChar32 U_EXPORT2
uiter_current32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the next code point.
*
* Return the code point at the current index and increment
* the index (post-increment, like s[i++]),
* or return U_SENTINEL if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point (and post-increment the current index)
*
* @see UCharIterator
* @see U16_NEXT
* @stable ICU 2.1
*/
U_CAPI UChar32 U_EXPORT2
uiter_next32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the previous code point.
*
* Decrement the index and return the code point from there
* (pre-decrement, like s[--i]),
* or return U_SENTINEL if there is none (index is at the start).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the previous code point (after pre-decrementing the current index)
*
* @see UCharIterator
* @see U16_PREV
* @stable ICU 2.1
*/
U_CAPI UChar32 U_EXPORT2
uiter_previous32(UCharIterator *iter);
/**
* Get the "state" of the iterator in the form of a single 32-bit word.
* This is a convenience function that calls iter->getState(iter)
* if iter->getState is not NULL;
* if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
*
* Some UCharIterator implementations may not be able to return
* a valid state for each position, in which case they return UITER_NO_STATE instead.
* This will be clearly documented for each such iterator (none of the public ones here).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the state word
*
* @see UCharIterator
* @see UCharIteratorGetState
* @see UITER_NO_STATE
* @stable ICU 2.6
*/
U_CAPI uint32_t U_EXPORT2
uiter_getState(const UCharIterator *iter);
/**
* Restore the "state" of the iterator using a state word from a getState() call.
* This is a convenience function that calls iter->setState(iter, state, pErrorCode)
* if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
*
* @param iter the UCharIterator structure ("this pointer")
* @param state the state word from a getState() call
* on a same-type, same-string iterator
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see UCharIterator
* @see UCharIteratorSetState
* @stable ICU 2.6
*/
U_CAPI void U_EXPORT2
uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
/**
* Set up a UCharIterator to iterate over a string.
*
* Sets the UCharIterator function pointers for iteration over the string s
* with iteration boundaries start=index=0 and length=limit=string length.
* The "provider" may set the start, index, and limit values at any time
* within the range 0..length.
* The length field will be ignored.
*
* The string pointer s is set into UCharIterator.context without copying
* or reallocating the string contents.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param s String to iterate over
* @param length Length of s, or -1 if NUL-terminated
*
* @see UCharIterator
* @stable ICU 2.1
*/
U_CAPI void U_EXPORT2
uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
/**
* Set up a UCharIterator to iterate over a UTF-16BE string
* (byte vector with a big-endian pair of bytes per UChar).
*
* Everything works just like with a normal UChar iterator (uiter_setString),
* except that UChars are assembled from byte pairs,
* and that the length argument here indicates an even number of bytes.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param s UTF-16BE string to iterate over
* @param length Length of s as an even number of bytes, or -1 if NUL-terminated
* (NUL means pair of 0 bytes at even index from s)
*
* @see UCharIterator
* @see uiter_setString
* @stable ICU 2.6
*/
U_CAPI void U_EXPORT2
uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
/**
* Set up a UCharIterator to iterate over a UTF-8 string.
*
* Sets the UCharIterator function pointers for iteration over the UTF-8 string s
* with UTF-8 iteration boundaries 0 and length.
* The implementation counts the UTF-16 index on the fly and
* lazily evaluates the UTF-16 length of the text.
*
* The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
* When the reservedField is not 0, then it contains a supplementary code point
* and the UTF-16 index is between the two corresponding surrogates.
* At that point, the UTF-8 index is behind that code point.
*
* The UTF-8 string pointer s is set into UCharIterator.context without copying
* or reallocating the string contents.
*
* getState() returns a state value consisting of
* - the current UTF-8 source byte index (bits 31..1)
* - a flag (bit 0) that indicates whether the UChar position is in the middle
* of a surrogate pair
* (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
*
* getState() cannot also encode the UTF-16 index in the state value.
* move(relative to limit or length), or
* move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
*
* @param iter UCharIterator structure to be set for iteration
* @param s UTF-8 string to iterate over
* @param length Length of s in bytes, or -1 if NUL-terminated
*
* @see UCharIterator
* @stable ICU 2.6
*/
U_CAPI void U_EXPORT2
uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
#if U_SHOW_CPLUSPLUS_API
/**
* Set up a UCharIterator to wrap around a C++ CharacterIterator.
*
* Sets the UCharIterator function pointers for iteration using the
* CharacterIterator charIter.
*
* The CharacterIterator pointer charIter is set into UCharIterator.context
* without copying or cloning the CharacterIterator object.
* The other "protected" UCharIterator fields are set to 0 and will be ignored.
* The iteration index and boundaries are controlled by the CharacterIterator.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param charIter CharacterIterator to wrap
*
* @see UCharIterator
* @stable ICU 2.1
*/
U_CAPI void U_EXPORT2
uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
/**
* Set up a UCharIterator to iterate over a C++ Replaceable.
*
* Sets the UCharIterator function pointers for iteration over the
* Replaceable rep with iteration boundaries start=index=0 and
* length=limit=rep->length().
* The "provider" may set the start, index, and limit values at any time
* within the range 0..length=rep->length().
* The length field will be ignored.
*
* The Replaceable pointer rep is set into UCharIterator.context without copying
* or cloning/reallocating the Replaceable object.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param rep Replaceable to iterate over
*
* @see UCharIterator
* @stable ICU 2.1
*/
U_CAPI void U_EXPORT2
uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
#endif
U_CDECL_END
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,491 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: umachine.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*
* This file defines basic types and constants for ICU to be
* platform-independent. umachine.h and utf.h are included into
* utypes.h to provide all the general definitions for ICU.
* All of these definitions used to be in utypes.h before
* the UTF-handling macros made this unmaintainable.
*/
#ifndef __UMACHINE_H__
#define __UMACHINE_H__
/**
* \file
* \brief Basic types and constants for UTF
*
* <h2> Basic types and constants for UTF </h2>
* This file defines basic types and constants for utf.h to be
* platform-independent. umachine.h and utf.h are included into
* utypes.h to provide all the general definitions for ICU.
* All of these definitions used to be in utypes.h before
* the UTF-handling macros made this unmaintainable.
*
*/
/*==========================================================================*/
/* Include platform-dependent definitions */
/* which are contained in the platform-specific file platform.h */
/*==========================================================================*/
#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
/*
* ANSI C headers:
* stddef.h defines wchar_t
*/
#include <stdbool.h>
#include <stddef.h>
/*==========================================================================*/
/* For C wrappers, we use the symbol U_CAPI. */
/* This works properly if the includer is C or C++. */
/* Functions are declared U_CAPI return-type U_EXPORT2 function-name()... */
/*==========================================================================*/
/**
* \def U_CFUNC
* This is used in a declaration of a library private ICU C function.
* @stable ICU 2.4
*/
/**
* \def U_CDECL_BEGIN
* This is used to begin a declaration of a library private ICU C API.
* @stable ICU 2.4
*/
/**
* \def U_CDECL_END
* This is used to end a declaration of a library private ICU C API
* @stable ICU 2.4
*/
#ifdef __cplusplus
# define U_CFUNC extern "C"
# define U_CDECL_BEGIN extern "C" {
# define U_CDECL_END }
#else
# define U_CFUNC extern
# define U_CDECL_BEGIN
# define U_CDECL_END
#endif
#ifndef U_ATTRIBUTE_DEPRECATED
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for GCC specific attributes
* @internal
*/
#if U_GCC_MAJOR_MINOR >= 302
# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for Visual C++ specific attributes
* @internal
*/
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
#else
# define U_ATTRIBUTE_DEPRECATED
#endif
#endif
/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
#define U_CAPI U_CFUNC U_EXPORT
/** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
#define U_STABLE U_CAPI
/** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API */
#define U_DRAFT U_CAPI
/** This is used to declare a function as a deprecated public ICU C API */
#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
/** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API */
#define U_OBSOLETE U_CAPI
/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
#define U_INTERNAL U_CAPI
/**
* \def U_OVERRIDE
* Defined to the C++11 "override" keyword if available.
* Denotes a class or member which is an override of the base class.
* May result in an error if it applied to something not an override.
* @internal
*/
#ifndef U_OVERRIDE
#define U_OVERRIDE override
#endif
/**
* \def U_FINAL
* Defined to the C++11 "final" keyword if available.
* Denotes a class or member which may not be overridden in subclasses.
* May result in an error if subclasses attempt to override.
* @internal
*/
#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
#define U_FINAL final
#endif
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
// series of statements wrapped in { } blocks and the caller could choose to
// either treat them as if they were actual functions and end the invocation
// with a trailing ; creating an empty statement after the block or else omit
// this trailing ; using the knowledge that the macro would expand to { }.
//
// But doing so doesn't work well with macros that look like functions and
// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
// switches to the standard solution of wrapping such macros in do { } while.
//
// This will however break existing code that depends on being able to invoke
// these macros without a trailing ; so to be able to remain compatible with
// such code the wrapper is itself defined as macros so that it's possible to
// build ICU 65 and later with the old macro behaviour, like this:
//
// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
// runConfigureICU ...
//
/**
* \def UPRV_BLOCK_MACRO_BEGIN
* Defined as the "do" keyword by default.
* @internal
*/
#ifndef UPRV_BLOCK_MACRO_BEGIN
#define UPRV_BLOCK_MACRO_BEGIN do
#endif
/**
* \def UPRV_BLOCK_MACRO_END
* Defined as "while (false)" by default.
* @internal
*/
#ifndef UPRV_BLOCK_MACRO_END
#define UPRV_BLOCK_MACRO_END while (false)
#endif
/*==========================================================================*/
/* limits for int32_t etc., like in POSIX inttypes.h */
/*==========================================================================*/
#ifndef INT8_MIN
/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
# define INT8_MIN ((int8_t)(-128))
#endif
#ifndef INT16_MIN
/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
# define INT16_MIN ((int16_t)(-32767-1))
#endif
#ifndef INT32_MIN
/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
# define INT32_MIN ((int32_t)(-2147483647-1))
#endif
#ifndef INT8_MAX
/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
# define INT8_MAX ((int8_t)(127))
#endif
#ifndef INT16_MAX
/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
# define INT16_MAX ((int16_t)(32767))
#endif
#ifndef INT32_MAX
/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
# define INT32_MAX ((int32_t)(2147483647))
#endif
#ifndef UINT8_MAX
/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT8_MAX ((uint8_t)(255U))
#endif
#ifndef UINT16_MAX
/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT16_MAX ((uint16_t)(65535U))
#endif
#ifndef UINT32_MAX
/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT32_MAX ((uint32_t)(4294967295U))
#endif
#if defined(U_INT64_T_UNAVAILABLE)
# error int64_t is required for decimal format and rule-based number format.
#else
# ifndef INT64_C
/**
* Provides a platform independent way to specify a signed 64-bit integer constant.
* note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
* @stable ICU 2.8
*/
# define INT64_C(c) c ## LL
# endif
# ifndef UINT64_C
/**
* Provides a platform independent way to specify an unsigned 64-bit integer constant.
* note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
* @stable ICU 2.8
*/
# define UINT64_C(c) c ## ULL
# endif
# ifndef U_INT64_MIN
/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
# endif
# ifndef U_INT64_MAX
/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
# endif
# ifndef U_UINT64_MAX
/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
# endif
#endif
/*==========================================================================*/
/* Boolean data type */
/*==========================================================================*/
/**
* The ICU boolean type, a signed-byte integer.
* ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
* Also provides a fixed type definition, as opposed to
* type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
*
* @stable ICU 2.0
*/
typedef int8_t UBool;
/**
* \def U_DEFINE_FALSE_AND_TRUE
* Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
* These obsolete macros sometimes break compilation of other code that
* defines enum constants or similar with these names.
* C++ has long defined bool/false/true.
* C99 also added definitions for these, although as macros; see stdbool.h.
*
* You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
*
* @internal ICU 68
*/
#ifdef U_DEFINE_FALSE_AND_TRUE
// Use the predefined value.
#elif defined(U_COMBINED_IMPLEMENTATION) || \
defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
defined(U_TOOLUTIL_IMPLEMENTATION)
// Inside ICU: Keep FALSE & TRUE available.
# define U_DEFINE_FALSE_AND_TRUE 1
#else
// Outside ICU: Avoid collision with non-macro definitions of FALSE & TRUE.
# define U_DEFINE_FALSE_AND_TRUE 0
#endif
#if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
#ifndef TRUE
/**
* The TRUE value of a UBool.
*
* @deprecated ICU 68 Use standard "true" instead.
*/
# define TRUE 1
#endif
#ifndef FALSE
/**
* The FALSE value of a UBool.
*
* @deprecated ICU 68 Use standard "false" instead.
*/
# define FALSE 0
#endif
#endif // U_DEFINE_FALSE_AND_TRUE
/*==========================================================================*/
/* Unicode data types */
/*==========================================================================*/
/* wchar_t-related definitions -------------------------------------------- */
/*
* \def U_WCHAR_IS_UTF16
* Defined if wchar_t uses UTF-16.
*
* @stable ICU 2.0
*/
/*
* \def U_WCHAR_IS_UTF32
* Defined if wchar_t uses UTF-32.
*
* @stable ICU 2.0
*/
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
# ifdef __STDC_ISO_10646__
# if (U_SIZEOF_WCHAR_T==2)
# define U_WCHAR_IS_UTF16
# elif (U_SIZEOF_WCHAR_T==4)
# define U_WCHAR_IS_UTF32
# endif
# elif defined __UCS2__
# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
# define U_WCHAR_IS_UTF16
# endif
# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
# if (U_SIZEOF_WCHAR_T==4)
# define U_WCHAR_IS_UTF32
# endif
# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
# define U_WCHAR_IS_UTF32
# elif U_PLATFORM_HAS_WIN32_API
# define U_WCHAR_IS_UTF16
# endif
#endif
/* UChar and UChar32 definitions -------------------------------------------- */
/** Number of bytes in a UChar. @stable ICU 2.0 */
#define U_SIZEOF_UCHAR 2
/**
* \def U_CHAR16_IS_TYPEDEF
* If 1, then char16_t is a typedef and not a real type (yet)
* @internal
*/
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
// for AIX, uchar.h needs to be included
# include <uchar.h>
# define U_CHAR16_IS_TYPEDEF 1
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
# define U_CHAR16_IS_TYPEDEF 1
#else
# define U_CHAR16_IS_TYPEDEF 0
#endif
/**
* \var UChar
*
* The base type for UTF-16 code units and pointers.
* Unsigned 16-bit integer.
* Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
*
* UChar is configurable by defining the macro UCHAR_TYPE
* on the preprocessor or compiler command line:
* -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
* (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
* This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
*
* The default is UChar=char16_t.
*
* C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
*
* In C, char16_t is a simple typedef of uint_least16_t.
* ICU requires uint_least16_t=uint16_t for data memory mapping.
* On macOS, char16_t is not available because the uchar.h standard header is missing.
*
* @stable ICU 4.4
*/
#if 1
// #if 1 is normal. UChar defaults to char16_t in C++.
// For configuration testing of UChar=uint16_t temporarily change this to #if 0.
// The intltest Makefile #defines UCHAR_TYPE=char16_t,
// so we only #define it to uint16_t if it is undefined so far.
#elif !defined(UCHAR_TYPE)
# define UCHAR_TYPE uint16_t
#endif
#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
// Inside the ICU library code, never configurable.
typedef char16_t UChar;
#elif defined(UCHAR_TYPE)
typedef UCHAR_TYPE UChar;
#elif (U_CPLUSPLUS_VERSION >= 11)
typedef char16_t UChar;
#else
typedef uint16_t UChar;
#endif
/**
* \var OldUChar
* Default ICU 58 definition of UChar.
* A base type for UTF-16 code units and pointers.
* Unsigned 16-bit integer.
*
* Define OldUChar to be wchar_t if that is 16 bits wide.
* If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
*
* This makes the definition of OldUChar platform-dependent
* but allows direct string type compatibility with platforms with
* 16-bit wchar_t types.
*
* This is how UChar was defined in ICU 58, for transition convenience.
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
*
* @stable ICU 59
*/
#if U_SIZEOF_WCHAR_T==2
typedef wchar_t OldUChar;
#elif defined(__CHAR16_TYPE__)
typedef __CHAR16_TYPE__ OldUChar;
#else
typedef uint16_t OldUChar;
#endif
/**
* Define UChar32 as a type for single Unicode code points.
* UChar32 is a signed 32-bit integer (same as int32_t).
*
* The Unicode code point range is 0..0x10ffff.
* All other values (negative or >=0x110000) are illegal as Unicode code points.
* They may be used as sentinel values to indicate "done", "error"
* or similar non-code point conditions.
*
* Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
* to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
* or else to be uint32_t.
* That is, the definition of UChar32 was platform-dependent.
*
* @see U_SENTINEL
* @stable ICU 2.4
*/
typedef int32_t UChar32;
/**
* This value is intended for sentinel values for APIs that
* (take or) return single code points (UChar32).
* It is outside of the Unicode code point range 0..0x10ffff.
*
* For example, a "done" or "error" value in a new API
* could be indicated with U_SENTINEL.
*
* ICU APIs designed before ICU 2.4 usually define service-specific "done"
* values, mostly 0xffff.
* Those may need to be distinguished from
* actual U+ffff text contents by calling functions like
* CharacterIterator::hasNext() or UnicodeString::length().
*
* @return -1
* @see UChar32
* @stable ICU 2.4
*/
#define U_SENTINEL (-1)
#include "unicode/urename.h"
#endif

View File

@@ -0,0 +1,62 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1999-2006, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: umisc.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999oct15
* created by: Markus W. Scherer
*/
#ifndef UMISC_H
#define UMISC_H
#include "unicode/utypes.h"
/**
* \file
* \brief C API:misc definitions
*
* This file contains miscellaneous definitions for the C APIs.
*/
U_CDECL_BEGIN
/** A struct representing a range of text containing a specific field
* @stable ICU 2.0
*/
typedef struct UFieldPosition {
/**
* The field
* @stable ICU 2.0
*/
int32_t field;
/**
* The start of the text range containing field
* @stable ICU 2.0
*/
int32_t beginIndex;
/**
* The limit of the text range containing field
* @stable ICU 2.0
*/
int32_t endIndex;
} UFieldPosition;
#if !UCONFIG_NO_SERVICE
/**
* Opaque type returned by registerInstance, registerFactory and unregister for service registration.
* @stable ICU 2.6
*/
typedef const void* URegistryKey;
#endif
U_CDECL_END
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,324 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: uobject.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jun26
* created by: Markus W. Scherer
*/
#ifndef __UOBJECT_H__
#define __UOBJECT_H__
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/platform.h"
/**
* \file
* \brief C++ API: Common ICU base class UObject.
*/
/**
* \def U_NO_THROW
* Since ICU 64, use U_NOEXCEPT instead.
*
* Previously, define this to define the throw() specification so
* certain functions do not throw any exceptions
*
* UMemory operator new methods should have the throw() specification
* appended to them, so that the compiler adds the additional NULL check
* before calling constructors. Without, if <code>operator new</code> returns NULL the
* constructor is still called, and if the constructor references member
* data, (which it typically does), the result is a segmentation violation.
*
* @stable ICU 4.2. Since ICU 64, Use U_NOEXCEPT instead. See ICU-20422.
*/
#ifndef U_NO_THROW
#define U_NO_THROW U_NOEXCEPT
#endif
/*===========================================================================*/
/* UClassID-based RTTI */
/*===========================================================================*/
/**
* UClassID is used to identify classes without using the compiler's RTTI.
* This was used before C++ compilers consistently supported RTTI.
* ICU 4.6 requires compiler RTTI to be turned on.
*
* Each class hierarchy which needs
* to implement polymorphic clone() or operator==() defines two methods,
* described in detail below. UClassID values can be compared using
* operator==(). Nothing else should be done with them.
*
* \par
* In class hierarchies that implement "poor man's RTTI",
* each concrete subclass implements getDynamicClassID() in the same way:
*
* \code
* class Derived {
* public:
* virtual UClassID getDynamicClassID() const
* { return Derived::getStaticClassID(); }
* }
* \endcode
*
* Each concrete class implements getStaticClassID() as well, which allows
* clients to test for a specific type.
*
* \code
* class Derived {
* public:
* static UClassID U_EXPORT2 getStaticClassID();
* private:
* static char fgClassID;
* }
*
* // In Derived.cpp:
* UClassID Derived::getStaticClassID()
* { return (UClassID)&Derived::fgClassID; }
* char Derived::fgClassID = 0; // Value is irrelevant
* \endcode
* @stable ICU 2.0
*/
typedef void* UClassID;
U_NAMESPACE_BEGIN
/**
* UMemory is the common ICU base class.
* All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
*
* This is primarily to make it possible and simple to override the
* C++ memory management by adding new/delete operators to this base class.
*
* To override ALL ICU memory management, including that from plain C code,
* replace the allocation functions declared in cmemory.h
*
* UMemory does not contain any virtual functions.
* Common "boilerplate" functions are defined in UObject.
*
* @stable ICU 2.4
*/
class U_COMMON_API UMemory {
public:
/* test versions for debugging shaper heap memory problems */
#ifdef SHAPER_MEMORY_DEBUG
static void * NewArray(int size, int count);
static void * GrowArray(void * array, int newSize );
static void FreeArray(void * array );
#endif
#if U_OVERRIDE_CXX_ALLOCATION
/**
* Override for ICU4C C++ memory management.
* simple, non-class types are allocated using the macros in common/cmemory.h
* (uprv_malloc(), uprv_free(), uprv_realloc());
* they or something else could be used here to implement C++ new/delete
* for ICU4C C++ classes
* @stable ICU 2.4
*/
static void * U_EXPORT2 operator new(size_t size) U_NOEXCEPT;
/**
* Override for ICU4C C++ memory management.
* See new().
* @stable ICU 2.4
*/
static void * U_EXPORT2 operator new[](size_t size) U_NOEXCEPT;
/**
* Override for ICU4C C++ memory management.
* simple, non-class types are allocated using the macros in common/cmemory.h
* (uprv_malloc(), uprv_free(), uprv_realloc());
* they or something else could be used here to implement C++ new/delete
* for ICU4C C++ classes
* @stable ICU 2.4
*/
static void U_EXPORT2 operator delete(void *p) U_NOEXCEPT;
/**
* Override for ICU4C C++ memory management.
* See delete().
* @stable ICU 2.4
*/
static void U_EXPORT2 operator delete[](void *p) U_NOEXCEPT;
#if U_HAVE_PLACEMENT_NEW
/**
* Override for ICU4C C++ memory management for STL.
* See new().
* @stable ICU 2.6
*/
static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NOEXCEPT { return ptr; }
/**
* Override for ICU4C C++ memory management for STL.
* See delete().
* @stable ICU 2.6
*/
static inline void U_EXPORT2 operator delete(void *, void *) U_NOEXCEPT {}
#endif /* U_HAVE_PLACEMENT_NEW */
#if U_HAVE_DEBUG_LOCATION_NEW
/**
* This method overrides the MFC debug version of the operator new
*
* @param size The requested memory size
* @param file The file where the allocation was requested
* @param line The line where the allocation was requested
*/
static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NOEXCEPT;
/**
* This method provides a matching delete for the MFC debug new
*
* @param p The pointer to the allocated memory
* @param file The file where the allocation was requested
* @param line The line where the allocation was requested
*/
static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NOEXCEPT;
#endif /* U_HAVE_DEBUG_LOCATION_NEW */
#endif /* U_OVERRIDE_CXX_ALLOCATION */
/*
* Assignment operator not declared. The compiler will provide one
* which does nothing since this class does not contain any data members.
* API/code coverage may show the assignment operator as present and
* untested - ignore.
* Subclasses need this assignment operator if they use compiler-provided
* assignment operators of their own. An alternative to not declaring one
* here would be to declare and empty-implement a protected or public one.
UMemory &UMemory::operator=(const UMemory &);
*/
};
/**
* UObject is the common ICU "boilerplate" class.
* UObject inherits UMemory (starting with ICU 2.4),
* and all other public ICU C++ classes
* are derived from UObject (starting with ICU 2.2).
*
* UObject contains common virtual functions, in particular a virtual destructor.
*
* The clone() function is not available in UObject because it is not
* implemented by all ICU classes.
* Many ICU services provide a clone() function for their class trees,
* defined on the service's C++ base class
* (which itself is a subclass of UObject).
*
* @stable ICU 2.2
*/
class U_COMMON_API UObject : public UMemory {
public:
/**
* Destructor.
*
* @stable ICU 2.2
*/
virtual ~UObject();
/**
* ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
* The base class implementation returns a dummy value.
*
* Use compiler RTTI rather than ICU's "poor man's RTTI".
* Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
*
* @stable ICU 2.2
*/
virtual UClassID getDynamicClassID() const;
protected:
// the following functions are protected to prevent instantiation and
// direct use of UObject itself
// default constructor
// inline UObject() {}
// copy constructor
// inline UObject(const UObject &other) {}
#if 0
// TODO Sometime in the future. Implement operator==().
// (This comment inserted in 2.2)
// some or all of the following "boilerplate" functions may be made public
// in a future ICU4C release when all subclasses implement them
// assignment operator
// (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
// commented out because the implementation is the same as a compiler's default
// UObject &operator=(const UObject &other) { return *this; }
// comparison operators
virtual inline UBool operator==(const UObject &other) const { return this==&other; }
inline UBool operator!=(const UObject &other) const { return !operator==(other); }
// clone() commented out from the base class:
// some compilers do not support co-variant return types
// (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
// see also UObject class documentation.
// virtual UObject *clone() const;
#endif
/*
* Assignment operator not declared. The compiler will provide one
* which does nothing since this class does not contain any data members.
* API/code coverage may show the assignment operator as present and
* untested - ignore.
* Subclasses need this assignment operator if they use compiler-provided
* assignment operators of their own. An alternative to not declaring one
* here would be to declare and empty-implement a protected or public one.
UObject &UObject::operator=(const UObject &);
*/
};
#ifndef U_HIDE_INTERNAL_API
/**
* This is a simple macro to add ICU RTTI to an ICU object implementation.
* This does not go into the header. This should only be used in *.cpp files.
*
* @param myClass The name of the class that needs RTTI defined.
* @internal
*/
#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
UClassID U_EXPORT2 myClass::getStaticClassID() { \
static char classID = 0; \
return (UClassID)&classID; \
} \
UClassID myClass::getDynamicClassID() const \
{ return myClass::getStaticClassID(); }
/**
* This macro adds ICU RTTI to an ICU abstract class implementation.
* This macro should be invoked in *.cpp files. The corresponding
* header should declare getStaticClassID.
*
* @param myClass The name of the class that needs RTTI defined.
* @internal
*/
#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
UClassID U_EXPORT2 myClass::getStaticClassID() { \
static char classID = 0; \
return (UClassID)&classID; \
}
#endif /* U_HIDE_INTERNAL_API */
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,734 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf16.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep09
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: 16-bit Unicode handling macros
*
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
*
* For more information see utf.h and the ICU User Guide Strings chapter
* (https://unicode-org.github.io/icu/userguide/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.
*/
#ifndef __UTF16_H__
#define __UTF16_H__
#include <stdbool.h>
#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
/* single-code point definitions -------------------------------------------- */
/**
* Does this code unit alone encode a code point (BMP, not a surrogate)?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
/**
* Is this code unit a lead surrogate (U+d800..U+dbff)?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
/**
* Is this code unit a trail surrogate (U+dc00..U+dfff)?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
/**
* Is this code unit a surrogate (U+d800..U+dfff)?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
/**
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
* is it a lead surrogate?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
/**
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
* is it a trail surrogate?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 4.2
*/
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
/**
* Helper constant for U16_GET_SUPPLEMENTARY.
* @internal
*/
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
/**
* Get a supplementary code point value (U+10000..U+10ffff)
* from its lead and trail surrogates.
* The result is undefined if the input values are not
* lead and trail surrogates.
*
* @param lead lead surrogate (U+d800..U+dbff)
* @param trail trail surrogate (U+dc00..U+dfff)
* @return supplementary code point (U+10000..U+10ffff)
* @stable ICU 2.4
*/
#define U16_GET_SUPPLEMENTARY(lead, trail) \
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
/**
* Get the lead surrogate (0xd800..0xdbff) for a
* supplementary code point (0x10000..0x10ffff).
* @param supplementary 32-bit code point (U+10000..U+10ffff)
* @return lead surrogate (U+d800..U+dbff) for supplementary
* @stable ICU 2.4
*/
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
/**
* Get the trail surrogate (0xdc00..0xdfff) for a
* supplementary code point (0x10000..0x10ffff).
* @param supplementary 32-bit code point (U+10000..U+10ffff)
* @return trail surrogate (U+dc00..U+dfff) for supplementary
* @stable ICU 2.4
*/
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
/**
* How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
* The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
* @param c 32-bit code point
* @return 1 or 2
* @stable ICU 2.4
*/
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
/**
* The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
* @return 2
* @stable ICU 2.4
*/
#define U16_MAX_LENGTH 2
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
* The result is undefined if the offset points to a single, unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_GET
* @stable ICU 2.4
*/
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
if(U16_IS_SURROGATE_LEAD(c)) { \
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
} else { \
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to a single, unpaired surrogate, then
* c is set to that unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_GET_UNSAFE
* @stable ICU 2.4
*/
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c)) { \
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} else { \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to a single, unpaired surrogate, then
* c is set to U+FFFD.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_GET_UNSAFE
* @stable ICU 60
*/
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c)) { \
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} else { \
(c)=0xfffd; \
} \
} else { \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} else { \
(c)=0xfffd; \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with forward iteration --------------------------------------- */
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate, then that itself
* will be returned as the code point.
* The result is undefined if the offset points to a single, unpaired lead surrogate.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_NEXT
* @stable ICU 2.4
*/
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_LEAD(c)) { \
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
* to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_LEAD(c)) { \
uint16_t __c2; \
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
* to a single, unpaired lead surrogate, then c is set to U+FFFD.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_NEXT_UNSAFE
* @stable ICU 60
*/
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} else { \
(c)=0xfffd; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
* Otherwise, the result is undefined.
*
* @param s const UChar * string buffer
* @param i string offset
* @param c code point to append
* @see U16_APPEND
* @stable ICU 2.4
*/
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint16_t)(c); \
} else { \
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* If a surrogate pair is written, checks for sufficient space in the string.
* If the code point is not valid or a trail surrogate does not fit,
* then isError is set to true.
*
* @param s const UChar * string buffer
* @param i string offset, must be i<capacity
* @param capacity size of the string buffer
* @param c code point to append
* @param isError output UBool set to true if an error occurs, otherwise not modified
* @see U16_APPEND_UNSAFE
* @stable ICU 2.4
*/
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint16_t)(c); \
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
} else /* c>0x10ffff or not enough space */ { \
(isError)=true; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_FWD_1
* @stable ICU 2.4
*/
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)++])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @see U16_FWD_1_UNSAFE
* @stable ICU 2.4
*/
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @param n number of code points to skip
* @see U16_FWD_N
* @stable ICU 2.4
*/
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U16_FWD_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param n number of code points to skip
* @see U16_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U16_FWD_1(s, i, length); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to the trail surrogate of a surrogate pair,
* then the offset is decremented.
* Otherwise, it is not modified.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_SET_CP_START
* @stable ICU 2.4
*/
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[i])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to the trail surrogate of a surrogate pair,
* then the offset is decremented.
* Otherwise, it is not modified.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i
* @see U16_SET_CP_START_UNSAFE
* @stable ICU 2.4
*/
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with backward iteration -------------------------------------- */
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate, then that itself
* will be returned as the code point.
* The result is undefined if the offset is behind a single, unpaired trail surrogate.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_PREV
* @stable ICU 2.4
*/
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_TRAIL(c)) { \
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
* trail surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @param c output UChar32 variable
* @see U16_PREV_UNSAFE
* @stable ICU 2.4
*/
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_TRAIL(c)) { \
uint16_t __c2; \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
--(i); \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
* trail surrogate, then c is set to U+FFFD.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @param c output UChar32 variable
* @see U16_PREV_UNSAFE
* @stable ICU 60
*/
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
--(i); \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} else { \
(c)=0xfffd; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_BACK_1
* @stable ICU 2.4
*/
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[--(i)])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @see U16_BACK_1_UNSAFE
* @stable ICU 2.4
*/
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @param n number of code points to skip
* @see U16_BACK_N
* @stable ICU 2.4
*/
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U16_BACK_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start start of string
* @param i string offset, must be start<i
* @param n number of code points to skip
* @see U16_BACK_N_UNSAFE
* @stable ICU 2.4
*/
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && (i)>(start)) { \
U16_BACK_1(s, start, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind the lead surrogate of a surrogate pair,
* then the offset is incremented.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_SET_CP_LIMIT
* @stable ICU 2.4
*/
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)-1])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind the lead surrogate of a surrogate pair,
* then the offset is incremented.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, start<=i<=length
* @param length int32_t string length
* @see U16_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
#endif

View File

@@ -0,0 +1,882 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf8.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: 8-bit Unicode handling macros
*
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
*
* For more information see utf.h and the ICU User Guide Strings chapter
* (https://unicode-org.github.io/icu/userguide/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.
*/
#ifndef __UTF8_H__
#define __UTF8_H__
#include <stdbool.h>
#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
/* internal definitions ----------------------------------------------------- */
/**
* Counts the trail bytes for a UTF-8 lead byte.
* Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES(leadByte) \
(U8_IS_LEAD(leadByte) ? \
((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
/**
* Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
* Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
(((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
/**
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
* @internal
*/
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
/**
* Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
* Lead byte E0..EF bits 3..0 are used as byte index,
* first trail byte bits 7..5 are used as bit index into that byte.
* @see U8_IS_VALID_LEAD3_AND_T1
* @internal
*/
#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
/**
* Internal 3-byte UTF-8 validity check.
* Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
* @internal
*/
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
/**
* Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
* First trail byte bits 7..4 are used as byte index,
* lead byte F0..F4 bits 2..0 are used as bit index into that byte.
* @see U8_IS_VALID_LEAD4_AND_T1
* @internal
*/
#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
/**
* Internal 4-byte UTF-8 validity check.
* Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
* @internal
*/
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
/**
* Function for handling "next code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_CAPI UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
/**
* Function for handling "append code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_CAPI int32_t U_EXPORT2
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
/**
* Function for handling "previous code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_CAPI UChar32 U_EXPORT2
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
/**
* Function for handling "skip backward one code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_CAPI int32_t U_EXPORT2
utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
/* single-code point definitions -------------------------------------------- */
/**
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
* @param c 8-bit code unit (byte)
* @return true or false
* @stable ICU 2.4
*/
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
/**
* Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
* @param c 8-bit code unit (byte)
* @return true or false
* @stable ICU 2.4
*/
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
// 0x32=0xf4-0xc2
/**
* Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
* @param c 8-bit code unit (byte)
* @return true or false
* @stable ICU 2.4
*/
#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
/**
* How many code units (bytes) are used for the UTF-8 encoding
* of this Unicode code point?
* @param c 32-bit code point
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
* @stable ICU 2.4
*/
#define U8_LENGTH(c) \
((uint32_t)(c)<=0x7f ? 1 : \
((uint32_t)(c)<=0x7ff ? 2 : \
((uint32_t)(c)<=0xd7ff ? 3 : \
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
((uint32_t)(c)<=0xffff ? 3 : 4)\
) \
) \
) \
)
/**
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
* @return 4
* @stable ICU 2.4
*/
#define U8_MAX_LENGTH 4
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
* The result is undefined if the offset points to an illegal UTF-8
* byte sequence.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_GET
* @stable ICU 2.4
*/
#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_unsafe_index=(int32_t)(i); \
U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to an illegal UTF-8 byte sequence, then
* c is set to a negative value.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset
* @param i int32_t string offset, must be start<=i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_GET_UNSAFE
* @stable ICU 2.4
*/
#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to an illegal UTF-8 byte sequence, then
* c is set to U+FFFD.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_GET() if that distinction is important.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset
* @param i int32_t string offset, must be start<=i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_GET
* @stable ICU 51
*/
#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END
/* definitions with forward iteration --------------------------------------- */
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* The result is undefined if the offset points to a trail byte
* or an illegal UTF-8 sequence.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_NEXT
* @stable ICU 2.4
*/
#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
if((c)<0xe0) { \
(c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
} else if((c)<0xf0) { \
/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
(c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
(i)+=2; \
} else { \
(c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
(i)+=3; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to a negative value.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to U+FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_NEXT() if that distinction is important.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_NEXT
* @stable ICU 51
*/
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
/** @internal */
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
uint8_t __t = 0; \
if((i)!=(length) && \
/* fetch/validate/assemble all but last trail byte */ \
((c)>=0xe0 ? \
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
(__t&=0x3f, 1) \
: /* U+10000..U+10FFFF */ \
((c)-=0xf0)<=4 && \
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
(__t=(s)[i]-0x80)<=0x3f) && \
/* valid second-to-last trail byte */ \
((c)=((c)<<6)|__t, ++(i)!=(length)) \
: /* U+0080..U+07FF */ \
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
/* last trail byte */ \
(__t=(s)[i]-0x80)<=0x3f && \
((c)=((c)<<6)|__t, ++(i), 1)) { \
} else { \
(c)=(sub); /* ill-formed*/ \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
* Otherwise, the result is undefined.
*
* @param s const uint8_t * string buffer
* @param i string offset
* @param c code point to append
* @see U8_APPEND
* @stable ICU 2.4
*/
#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else { \
if(__uc<=0x7ff) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
} else { \
if(__uc<=0xffff) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
} else { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* If a non-ASCII code point is written, checks for sufficient space in the string.
* If the code point is not valid or trail bytes do not fit,
* then isError is set to true.
*
* @param s const uint8_t * string buffer
* @param i int32_t string offset, must be i<capacity
* @param capacity int32_t size of the string buffer
* @param c UChar32 code point to append
* @param isError output UBool set to true if an error occurs, otherwise not modified
* @see U8_APPEND_UNSAFE
* @stable ICU 2.4
*/
#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else { \
(isError)=true; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_FWD_1
* @stable ICU 2.4
*/
#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
(i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @see U8_FWD_1_UNSAFE
* @stable ICU 2.4
*/
#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
uint8_t __b=(s)[(i)++]; \
if(U8_IS_LEAD(__b) && (i)!=(length)) { \
uint8_t __t1=(s)[i]; \
if((0xe0<=__b && __b<0xf0)) { \
if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
++(i); \
} \
} else if(__b<0xe0) { \
if(U8_IS_TRAIL(__t1)) { \
++(i); \
} \
} else /* c>=0xf0 */ { \
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
++(i); \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_FWD_N
* @stable ICU 2.4
*/
#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U8_FWD_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param n number of code points to skip
* @see U8_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U8_FWD_1(s, i, length); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_START
* @stable ICU 2.4
*/
#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
while(U8_IS_TRAIL((s)[i])) { --(i); } \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i
* @see U8_SET_CP_START_UNSAFE
* @see U8_TRUNCATE_IF_INCOMPLETE
* @stable ICU 2.4
*/
#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U8_IS_TRAIL((s)[(i)])) { \
(i)=utf8_back1SafeBody(s, start, (i)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* If the string ends with a UTF-8 byte sequence that is valid so far
* but incomplete, then reduce the length of the string to end before
* the lead byte of that incomplete sequence.
* For example, if the string ends with E1 80, the length is reduced by 2.
*
* In all other cases (the string ends with a complete sequence, or it is not
* possible for any further trail byte to extend the trailing sequence)
* the length remains unchanged.
*
* Useful for processing text split across multiple buffers
* (save the incomplete sequence for later)
* and for optimizing iteration
* (check for string length only once per character).
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_SET_CP_START(), this macro never reads s[length].
*
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param length int32_t string length (usually start<=length)
* @see U8_SET_CP_START
* @stable ICU 61
*/
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
if((length)>(start)) { \
uint8_t __b1=s[(length)-1]; \
if(U8_IS_SINGLE(__b1)) { \
/* common ASCII character */ \
} else if(U8_IS_LEAD(__b1)) { \
--(length); \
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
uint8_t __b2=s[(length)-2]; \
if(0xe0<=__b2 && __b2<=0xf4) { \
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
(length)-=2; \
} \
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
uint8_t __b3=s[(length)-3]; \
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
(length)-=3; \
} \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with backward iteration -------------------------------------- */
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* The result is undefined if the offset is behind an illegal UTF-8 sequence.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_PREV
* @stable ICU 2.4
*/
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(U8_IS_TRAIL(c)) { \
uint8_t __b, __count=1, __shift=6; \
\
/* c is a trail byte */ \
(c)&=0x3f; \
for(;;) { \
__b=(s)[--(i)]; \
if(__b>=0xc0) { \
U8_MASK_LEAD_BYTE(__b, __count); \
(c)|=(UChar32)__b<<__shift; \
break; \
} else { \
(c)|=(UChar32)(__b&0x3f)<<__shift; \
++__count; \
__shift+=6; \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_PREV_UNSAFE
* @stable ICU 2.4
*/
#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_PREV() if that distinction is important.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_PREV
* @stable ICU 51
*/
#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_BACK_1
* @stable ICU 2.4
*/
#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
while(U8_IS_TRAIL((s)[--(i)])) {} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @see U8_BACK_1_UNSAFE
* @stable ICU 2.4
*/
#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U8_IS_TRAIL((s)[--(i)])) { \
(i)=utf8_back1SafeBody(s, start, (i)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_BACK_N
* @stable ICU 2.4
*/
#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U8_BACK_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
* @param start int32_t index of the start of the string
* @param i int32_t string offset, must be start<i
* @param n number of code points to skip
* @see U8_BACK_N_UNSAFE
* @stable ICU 2.4
*/
#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && (i)>(start)) { \
U8_BACK_1(s, start, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind a partial multi-byte sequence,
* then the offset is incremented to behind the whole sequence.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_LIMIT
* @stable ICU 2.4
*/
#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
U8_BACK_1_UNSAFE(s, i); \
U8_FWD_1_UNSAFE(s, i); \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind a partial multi-byte sequence,
* then the offset is incremented to behind the whole sequence.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i<=length
* @param length int32_t string length
* @see U8_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if((start)<(i) && ((i)<(length) || (length)<0)) { \
U8_BACK_1(s, start, i); \
U8_FWD_1(s, i, length); \
} \
} UPRV_BLOCK_MACRO_END
#endif

View File

@@ -0,0 +1,732 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1996-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* FILE NAME : UTYPES.H (formerly ptypes.h)
*
* Date Name Description
* 12/11/96 helena Creation.
* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
* uint8, uint16, and uint32.
* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
* well as C++.
* Modified to use memcpy() for uprv_arrayCopy() fns.
* 04/14/97 aliu Added TPlatformUtilities.
* 05/07/97 aliu Added import/export specifiers (replacing the old
* broken EXT_CLASS). Added version number for our
* code. Cleaned up header.
* 6/20/97 helena Java class name change.
* 08/11/98 stephen UErrorCode changed from typedef to enum
* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
* 04/20/99 stephen Cleaned up & reworked for autoconf.
* Renamed to utypes.h.
* 05/05/99 stephen Changed to use <inttypes.h>
* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
*******************************************************************************
*/
#ifndef UTYPES_H
#define UTYPES_H
#include "unicode/umachine.h"
#include "unicode/uversion.h"
#include "unicode/uconfig.h"
#include <float.h>
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
# include "unicode/utf.h"
#endif
/*!
* \file
* \brief Basic definitions for ICU, for both C and C++ APIs
*
* This file defines basic types, constants, and enumerations directly or
* indirectly by including other header files, especially utf.h for the
* basic character and string definitions and umachine.h for consistent
* integer and other types.
*/
/**
* \def U_SHOW_CPLUSPLUS_API
* @internal
*/
#ifdef __cplusplus
# ifndef U_SHOW_CPLUSPLUS_API
# define U_SHOW_CPLUSPLUS_API 1
# endif
#else
# undef U_SHOW_CPLUSPLUS_API
# define U_SHOW_CPLUSPLUS_API 0
#endif
/** @{ API visibility control */
/**
* \def U_HIDE_DRAFT_API
* Define this to 1 to request that draft API be "hidden"
* @internal
*/
/**
* \def U_HIDE_INTERNAL_API
* Define this to 1 to request that internal API be "hidden"
* @internal
*/
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
#define U_HIDE_DRAFT_API 1
#endif
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
#define U_HIDE_INTERNAL_API 1
#endif
/** @} */
/*===========================================================================*/
/* ICUDATA naming scheme */
/*===========================================================================*/
/**
* \def U_ICUDATA_TYPE_LETTER
*
* This is a platform-dependent string containing one letter:
* - b for big-endian, ASCII-family platforms
* - l for little-endian, ASCII-family platforms
* - e for big-endian, EBCDIC-family platforms
* This letter is part of the common data file name.
* @stable ICU 2.0
*/
/**
* \def U_ICUDATA_TYPE_LITLETTER
* The non-string form of U_ICUDATA_TYPE_LETTER
* @stable ICU 2.0
*/
#if U_CHARSET_FAMILY
# if U_IS_BIG_ENDIAN
/* EBCDIC - should always be BE */
# define U_ICUDATA_TYPE_LETTER "e"
# define U_ICUDATA_TYPE_LITLETTER e
# else
# error "Don't know what to do with little endian EBCDIC!"
# define U_ICUDATA_TYPE_LETTER "x"
# define U_ICUDATA_TYPE_LITLETTER x
# endif
#else
# if U_IS_BIG_ENDIAN
/* Big-endian ASCII */
# define U_ICUDATA_TYPE_LETTER "b"
# define U_ICUDATA_TYPE_LITLETTER b
# else
/* Little-endian ASCII */
# define U_ICUDATA_TYPE_LETTER "l"
# define U_ICUDATA_TYPE_LITLETTER l
# endif
#endif
/**
* A single string literal containing the icudata stub name. i.e. 'icudt18e' for
* ICU 1.8.x on EBCDIC, etc..
* @stable ICU 2.0
*/
#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
#ifndef U_HIDE_INTERNAL_API
#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
#define U_USE_USRDATA 0 /**< @internal */
#endif /* U_HIDE_INTERNAL_API */
/**
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
* Defined as a literal, not a string.
* Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
* from the corresponding macro invocation, _before_ other macro substitutions.
* Need a nested \#defines to get the actual version numbers rather than
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
* The net result will be something of the form
* \#define U_ICU_ENTRY_POINT icudt19_dat
* @stable ICU 2.4
*/
#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
#ifndef U_HIDE_INTERNAL_API
/**
* Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
* @internal
*/
#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
/**
* Do not use.
* @internal
*/
#ifndef U_DEF_ICUDATA_ENTRY_POINT
/* affected by symbol renaming. See platform.h */
#ifndef U_LIB_SUFFIX_C_NAME
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
#else
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
#endif
#endif
#endif /* U_HIDE_INTERNAL_API */
/**
* \def NULL
* Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
* @stable ICU 2.0
*/
#ifndef NULL
#ifdef __cplusplus
#define NULL nullptr
#else
#define NULL ((void *)0)
#endif
#endif
/*===========================================================================*/
/* Calendar/TimeZone data types */
/*===========================================================================*/
/**
* Date and Time data type.
* This is a primitive data type that holds the date and time
* as the number of milliseconds since 1970-jan-01, 00:00 UTC.
* UTC leap seconds are ignored.
* @stable ICU 2.0
*/
typedef double UDate;
/** The number of milliseconds per second @stable ICU 2.0 */
#define U_MILLIS_PER_SECOND (1000)
/** The number of milliseconds per minute @stable ICU 2.0 */
#define U_MILLIS_PER_MINUTE (60000)
/** The number of milliseconds per hour @stable ICU 2.0 */
#define U_MILLIS_PER_HOUR (3600000)
/** The number of milliseconds per day @stable ICU 2.0 */
#define U_MILLIS_PER_DAY (86400000)
/**
* Maximum UDate value
* @stable ICU 4.8
*/
#define U_DATE_MAX DBL_MAX
/**
* Minimum UDate value
* @stable ICU 4.8
*/
#define U_DATE_MIN -U_DATE_MAX
/*===========================================================================*/
/* Shared library/DLL import-export API control */
/*===========================================================================*/
/*
* Control of symbol import/export.
* ICU is separated into three libraries.
*/
/**
* \def U_COMBINED_IMPLEMENTATION
* Set to export library symbols from inside the ICU library
* when all of ICU is in a single library.
* This can be set as a compiler option while building ICU, and it
* needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
* @stable ICU 2.0
*/
/**
* \def U_DATA_API
* Set to export library symbols from inside the stubdata library,
* and to import them from outside.
* @stable ICU 3.0
*/
/**
* \def U_COMMON_API
* Set to export library symbols from inside the common library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_I18N_API
* Set to export library symbols from inside the i18n library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_LAYOUT_API
* Set to export library symbols from inside the layout engine library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_LAYOUTEX_API
* Set to export library symbols from inside the layout extensions library,
* and to import them from outside.
* @stable ICU 2.6
*/
/**
* \def U_IO_API
* Set to export library symbols from inside the ustdio library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_TOOLUTIL_API
* Set to export library symbols from inside the toolutil library,
* and to import them from outside.
* @stable ICU 3.4
*/
#ifdef U_IN_DOXYGEN
// This definition is required when generating the API docs.
#define U_COMBINED_IMPLEMENTATION 1
#endif
#if defined(U_COMBINED_IMPLEMENTATION)
#define U_DATA_API U_EXPORT
#define U_COMMON_API U_EXPORT
#define U_I18N_API U_EXPORT
#define U_LAYOUT_API U_EXPORT
#define U_LAYOUTEX_API U_EXPORT
#define U_IO_API U_EXPORT
#define U_TOOLUTIL_API U_EXPORT
#elif defined(U_STATIC_IMPLEMENTATION)
#define U_DATA_API
#define U_COMMON_API
#define U_I18N_API
#define U_LAYOUT_API
#define U_LAYOUTEX_API
#define U_IO_API
#define U_TOOLUTIL_API
#elif defined(U_COMMON_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_EXPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_I18N_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_EXPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_LAYOUT_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_EXPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_LAYOUTEX_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_EXPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_IO_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_EXPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_TOOLUTIL_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_EXPORT
#else
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#endif
/**
* \def U_STANDARD_CPP_NAMESPACE
* Control of C++ Namespace
* @stable ICU 2.0
*/
#ifdef __cplusplus
#define U_STANDARD_CPP_NAMESPACE ::
#else
#define U_STANDARD_CPP_NAMESPACE
#endif
/*===========================================================================*/
/* UErrorCode */
/*===========================================================================*/
/**
* Standard ICU4C error code type, a substitute for exceptions.
*
* Initialize the UErrorCode with U_ZERO_ERROR, and check for success or
* failure using U_SUCCESS() or U_FAILURE():
*
* UErrorCode errorCode = U_ZERO_ERROR;
* // call ICU API that needs an error code parameter.
* if (U_FAILURE(errorCode)) {
* // An error occurred. Handle it here.
* }
*
* C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a
* suitable subclass.
*
* For more information, see:
* http://icu-project.org/userguide/conventions
*
* Note: By convention, ICU functions that take a reference (C++) or a pointer
* (C) to a UErrorCode first test:
*
* if (U_FAILURE(errorCode)) { return immediately; }
*
* so that in a chain of such functions the first one that sets an error code
* causes the following ones to not perform any operations.
*
* @stable ICU 2.0
*/
typedef enum UErrorCode {
/* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
* and is that way because VC++ debugger displays first encountered constant,
* which is not the what the code is used for
*/
U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UErrorCode warning value.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_ERROR_WARNING_LIMIT,
#endif // U_HIDE_DEPRECATED_API
U_ZERO_ERROR = 0, /**< No error, no warning. */
U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
It is very possible that a circular alias definition has occurred */
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
#ifndef U_HIDE_DRAFT_API
/**
* The input is impractically long for an operation.
* It is rejected because it may lead to problems such as excessive
* processing time, stack depth, or heap memory requirements.
*
* @draft ICU 68
*/
U_INPUT_TOO_LONG_ERROR = 31,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest standard error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_STANDARD_ERROR_LIMIT = 32,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10000 0x10100 are reserved for Transliterator.
*/
U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
U_MISSING_OPERATOR, /**< A rule contains no operator */
U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
U_MULTIPLE_CURSORS, /**< More than one cursor */
U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
U_TRAILING_BACKSLASH, /**< A dangling backslash */
U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal Transliterator error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_PARSE_ERROR_LIMIT,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10100 0x10200 are reserved for the formatting API.
*/
U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */
U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_FMT_PARSE_ERROR_LIMIT = 0x10114,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10200 0x102ff are reserved for BreakIterator.
*/
U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal BreakIterator error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_BRK_ERROR_LIMIT,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors.
*/
U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
#ifndef U_HIDE_DEPRECATED_API
U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
#endif /* U_HIDE_DEPRECATED_API */
U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */
U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */
U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal regular expression error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes.
*/
U_IDNA_PROHIBITED_ERROR=0x10400,
U_IDNA_ERROR_START=0x10400,
U_IDNA_UNASSIGNED_ERROR,
U_IDNA_CHECK_BIDI_ERROR,
U_IDNA_STD3_ASCII_RULES_ERROR,
U_IDNA_ACE_PREFIX_ERROR,
U_IDNA_VERIFICATION_ERROR,
U_IDNA_LABEL_TOO_LONG_ERROR,
U_IDNA_ZERO_LENGTH_LABEL_ERROR,
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal IDNA error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_IDNA_ERROR_LIMIT,
#endif // U_HIDE_DEPRECATED_API
/*
* Aliases for StringPrep
*/
U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
/*
* Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes.
*/
U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal plug-in error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_PLUGIN_ERROR_LIMIT,
#endif // U_HIDE_DEPRECATED_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT
#endif // U_HIDE_DEPRECATED_API
} UErrorCode;
/* Use the following to determine if an UErrorCode represents */
/* operational success or failure. */
#ifdef __cplusplus
/**
* Does the error code indicate success?
* @stable ICU 2.0
*/
static
inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
/**
* Does the error code indicate a failure?
* @stable ICU 2.0
*/
static
inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
#else
/**
* Does the error code indicate success?
* @stable ICU 2.0
*/
# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
/**
* Does the error code indicate a failure?
* @stable ICU 2.0
*/
# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
#endif
/**
* Return a string for a UErrorCode value.
* The string will be the same as the name of the error code constant
* in the UErrorCode enum above.
* @stable ICU 2.0
*/
U_CAPI const char * U_EXPORT2
u_errorName(UErrorCode code);
#endif /* _UTYPES */

View File

@@ -0,0 +1,198 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2000-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*
* file name: uvernum.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* Created by: Vladimir Weinstein
* Updated by: Steven R. Loomis
*
*/
/**
* \file
* \brief C API: definitions of ICU version numbers
*
* This file is included by uversion.h and other files. This file contains only
* macros and definitions. The actual version numbers are defined here.
*/
/*
* IMPORTANT: When updating version, the following things need to be done:
* source/common/unicode/uvernum.h - this file: update major, minor,
* patchlevel, suffix, version, short version constants, namespace,
* renaming macro, and copyright
*
* The following files need to be updated as well, which can be done
* by running the UNIX makefile target 'update-windows-makefiles' in icu/source.
*
*
* source/common/common_uwp.vcxproj
* source/common/common.vcxproj - update 'Output file name' on the link tab so
* that it contains the new major/minor combination
* source/i18n/i18n.vcxproj - same as for the common.vcxproj
* source/i18n/i18n_uwp.vcxproj - same as for the common_uwp.vcxproj
* source/layoutex/layoutex.vcproj - same
* source/stubdata/stubdata.vcproj - same as for the common.vcxproj
* source/io/io.vcproj - same as for the common.vcxproj
* source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
* the new major/minor combination and the Unicode version.
*/
#ifndef UVERNUM_H
#define UVERNUM_H
/** The standard copyright notice that gets compiled into each library.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_COPYRIGHT_STRING \
" Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html "
/** The current ICU major version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_MAJOR_NUM 69
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_MINOR_NUM 1
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_PATCHLEVEL_NUM 0
/** The current ICU build level version as an integer.
* This value is for use by ICU clients. It defaults to 0.
* @stable ICU 4.0
*/
#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
#define U_ICU_VERSION_BUILDLEVEL_NUM 0
#endif
/** Glued version suffix for renamers
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SUFFIX _69
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
* @internal
*/
/**
* \def U_DEF_ICU_ENTRY_POINT_RENAME
* @internal
*/
/** Glued version suffix function for renamers
* This value will change in the subsequent releases of ICU.
* If a custom suffix (such as matching library suffixes) is desired, this can be modified.
* Note that if present, platform.h may contain an earlier definition of this macro.
* \def U_ICU_ENTRY_POINT_RENAME
* @stable ICU 4.2
*/
/**
* Disable the version suffix. Use the custom suffix if exists.
* \def U_DISABLE_VERSION_SUFFIX
* @internal
*/
#ifndef U_DISABLE_VERSION_SUFFIX
#define U_DISABLE_VERSION_SUFFIX 0
#endif
#ifndef U_ICU_ENTRY_POINT_RENAME
#ifdef U_HAVE_LIB_SUFFIX
# if !U_DISABLE_VERSION_SUFFIX
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
# else
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_LIB_SUFFIX_C_NAME)
# endif
#else
# if !U_DISABLE_VERSION_SUFFIX
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
# else
# define U_ICU_ENTRY_POINT_RENAME(x) x
# endif
#endif
#endif
/** The current ICU library version as a dotted-decimal string. The patchlevel
* only appears in this string if it non-zero.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "69.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
* This value will change in subsequent releases of ICU.
*
* Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
* into one string without dots ("48").
* Since ICU 49, it is the double-digit major ICU version number.
* See https://unicode-org.github.io/icu/userguide/design#version-numbers-in-icu
*
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SHORT "69"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "69.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================
* ICU collation framework version information
* Version info that can be obtained from a collator is affected by these
* numbers in a secret and magic way. Please use collator version as whole
*===========================================================================
*/
/**
* Collation runtime version (sort key generator, strcoll).
* If the version is different, sort keys for the same string could be different.
* This value may change in subsequent releases of ICU.
* @stable ICU 2.4
*/
#define UCOL_RUNTIME_VERSION 9
/**
* Collation builder code version.
* When this is different, the same tailoring might result
* in assigning different collation elements to code points.
* This value may change in subsequent releases of ICU.
* @stable ICU 2.4
*/
#define UCOL_BUILDER_VERSION 9
#ifndef U_HIDE_DEPRECATED_API
/**
* Constant 1.
* This was intended to be the version of collation tailorings,
* but instead the tailoring data carries a version number.
* @deprecated ICU 54
*/
#define UCOL_TAILORINGS_VERSION 1
#endif /* U_HIDE_DEPRECATED_API */
#endif

View File

@@ -0,0 +1,187 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2000-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*
* file name: uversion.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* Created by: Vladimir Weinstein
*
* Gets included by utypes.h and Windows .rc files
*/
/**
* \file
* \brief C API: API for accessing ICU version numbers.
*/
/*===========================================================================*/
/* Main ICU version information */
/*===========================================================================*/
#ifndef UVERSION_H
#define UVERSION_H
#include "unicode/umachine.h"
/* Actual version info lives in uvernum.h */
#include "unicode/uvernum.h"
/** Maximum length of the copyright string.
* @stable ICU 2.4
*/
#define U_COPYRIGHT_STRING_LENGTH 128
/** An ICU version consists of up to 4 numbers from 0..255.
* @stable ICU 2.4
*/
#define U_MAX_VERSION_LENGTH 4
/** In a string, ICU version fields are delimited by dots.
* @stable ICU 2.4
*/
#define U_VERSION_DELIMITER '.'
/** The maximum length of an ICU version string.
* @stable ICU 2.4
*/
#define U_MAX_VERSION_STRING_LENGTH 20
/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
* To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).
* @stable ICU 2.4
*/
typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
/*===========================================================================*/
/* C++ namespace if supported. Versioned unless versioning is disabled. */
/*===========================================================================*/
/* Define C++ namespace symbols. */
#ifdef __cplusplus
/**
* \def U_NAMESPACE_BEGIN
* This is used to begin a declaration of a public ICU C++ API within
* versioned-ICU-namespace block.
*
* @stable ICU 2.4
*/
/**
* \def U_NAMESPACE_END
* This is used to end a declaration of a public ICU C++ API.
* It ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN.
*
* @stable ICU 2.4
*/
/**
* \def U_NAMESPACE_USE
* This is used to specify that the rest of the code uses the
* public ICU C++ API namespace.
* @stable ICU 2.4
*/
/**
* \def U_NAMESPACE_QUALIFIER
* This is used to qualify that a function or class is part of
* the public ICU C++ API namespace.
*
* This macro is unnecessary since ICU 49 requires namespace support.
* You can just use "icu::" instead.
* @stable ICU 2.4
*/
# if U_DISABLE_RENAMING
# define U_ICU_NAMESPACE icu
namespace U_ICU_NAMESPACE { }
# else
# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu)
namespace U_ICU_NAMESPACE { }
namespace icu = U_ICU_NAMESPACE;
# endif
# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE {
# define U_NAMESPACE_END }
# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
# ifndef U_USING_ICU_NAMESPACE
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
# define U_USING_ICU_NAMESPACE 0
# else
# define U_USING_ICU_NAMESPACE 0
# endif
# endif
# if U_USING_ICU_NAMESPACE
U_NAMESPACE_USE
# endif
#endif /* __cplusplus */
/*===========================================================================*/
/* General version helper functions. Definitions in putil.c */
/*===========================================================================*/
/**
* Parse a string with dotted-decimal version information and
* fill in a UVersionInfo structure with the result.
* Definition of this function lives in putil.c
*
* @param versionArray The destination structure for the version information.
* @param versionString A string with dotted-decimal version information,
* with up to four non-negative number fields with
* values of up to 255 each.
* @stable ICU 2.4
*/
U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray, const char *versionString);
/**
* Parse a Unicode string with dotted-decimal version information and
* fill in a UVersionInfo structure with the result.
* Definition of this function lives in putil.c
*
* @param versionArray The destination structure for the version information.
* @param versionString A Unicode string with dotted-decimal version
* information, with up to four non-negative number
* fields with values of up to 255 each.
* @stable ICU 4.2
*/
U_CAPI void U_EXPORT2
u_versionFromUString(UVersionInfo versionArray, const UChar *versionString);
/**
* Write a string with dotted-decimal version information according
* to the input UVersionInfo.
* Definition of this function lives in putil.c
*
* @param versionArray The version information to be written as a string.
* @param versionString A string buffer that will be filled in with
* a string corresponding to the numeric version
* information in versionArray.
* The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
* @stable ICU 2.4
*/
U_CAPI void U_EXPORT2
u_versionToString(const UVersionInfo versionArray, char *versionString);
/**
* Gets the ICU release version. The version array stores the version information
* for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.
* Definition of this function lives in putil.c
*
* @param versionArray the version # information, the result will be filled in
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
u_getVersion(UVersionInfo versionArray);
#endif

View File

@@ -0,0 +1,13 @@
add_library(
# Name
ime-nlp
# Headers
nlp.h
token.h
suggestion_list.h
# Sources
token.cpp
suggestion_list.cpp
)

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2021 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLORISBOARD_NLP_H
#define FLORISBOARD_NLP_H
#include <string>
namespace ime::nlp {
typedef std::string word_t;
typedef uint16_t freq_t;
static const freq_t FREQ_VALUE_MASK = 0xFF;
static const freq_t FREQ_POSSIBLY_OFFENSIVE = 0x01;
} // namespace ime::nlp
#endif // FLORISBOARD_NLP_H

View File

@@ -0,0 +1,98 @@
/*
* Copyright (C) 2021 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggestion_list.h"
#include <utility>
using namespace ime::nlp;
SuggestionList::SuggestionList(size_t _maxSize) :
maxSize(_maxSize), internalSize(0), tokens(_maxSize), isPrimaryTokenAutoInsert(false)
{ }
SuggestionList::~SuggestionList() = default;
bool SuggestionList::add(word_t &&word, freq_t &&freq) {
auto entryIndex = indexOfWord(word);
if (entryIndex.has_value()) {
// Word exists already
auto entry = tokens[entryIndex.value()];
if (entry.freq < freq) {
// Need to update freq
entry.freq = freq;
} else {
return false;
}
} else {
if (internalSize < maxSize) {
tokens[internalSize++] = WeightedToken(std::move(word), freq);
} else {
auto last = tokens[internalSize - 1];
if (last.freq < freq) {
tokens[internalSize - 1] = WeightedToken(std::move(word), freq);
} else {
return false;
}
}
}
std::sort(tokens.begin(), tokens.begin() + internalSize, std::greater<>());
return true;
}
void SuggestionList::clear() {
internalSize = 0;
isPrimaryTokenAutoInsert = false;
}
bool SuggestionList::contains(const WeightedToken &element) const {
return indexOf(element).has_value();
}
bool SuggestionList::containsWord(const word_t &word) const {
return indexOfWord(word).has_value();
}
const WeightedToken *SuggestionList::get(size_t index) const {
if (index < 0 || index >= internalSize) return nullptr;
return &tokens[index];
}
std::optional<size_t> SuggestionList::indexOf(const WeightedToken &element) const {
for (size_t n = 0; n < internalSize; n++) {
if (element == tokens[n]) {
return n;
}
}
return std::nullopt;
}
std::optional<size_t> SuggestionList::indexOfWord(const word_t &word) const {
for (size_t n = 0; n < internalSize; n++) {
if (word == tokens[n].data) {
return n;
}
}
return std::nullopt;
}
bool SuggestionList::isEmpty() const {
return internalSize == 0;
}
size_t SuggestionList::size() const {
return internalSize;
}

View File

@@ -0,0 +1,51 @@
/*
* Copyright (C) 2021 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLORISBOARD_SUGGESTION_LIST_H
#define FLORISBOARD_SUGGESTION_LIST_H
#include <optional>
#include <vector>
#include "token.h"
namespace ime::nlp {
class SuggestionList {
public:
SuggestionList(size_t _maxSize);
~SuggestionList();
bool add(word_t &&word, freq_t &&freq);
void clear();
bool contains(const WeightedToken &element) const;
bool containsWord(const word_t &word) const;
const WeightedToken *get(size_t index) const;
std::optional<size_t> indexOf(const WeightedToken &element) const;
std::optional<size_t> indexOfWord(const word_t &word) const;
bool isEmpty() const;
size_t size() const;
bool isPrimaryTokenAutoInsert;
private:
std::vector<WeightedToken> tokens;
size_t internalSize;
size_t maxSize;
};
} // namespace ime::nlp
#endif // FLORISBOARD_SUGGESTION_LIST_H

View File

@@ -0,0 +1,61 @@
/*
* Copyright (C) 2021 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "token.h"
#include <utility>
namespace ime::nlp {
Token::Token() : data() {}
Token::Token(word_t &&_data) : data(std::move(_data)) {}
bool operator==(const Token &t1, const Token &t2) {
return t1.data == t2.data;
}
bool operator!=(const Token &t1, const Token &t2) {
return !(t1 == t2);
}
WeightedToken::WeightedToken() : Token(), freq(0) {}
WeightedToken::WeightedToken(word_t &&_data, freq_t _freq) : Token(std::move(_data)), freq(_freq) {}
bool operator==(const WeightedToken &t1, const WeightedToken &t2) {
return t1.data == t2.data && t1.freq == t2.freq;
}
bool operator!=(const WeightedToken &t1, const WeightedToken &t2) {
return !(t1 == t2);
}
bool operator<(const WeightedToken &t1, const WeightedToken &t2) {
return t1.freq < t2.freq;
}
bool operator<=(const WeightedToken &t1, const WeightedToken &t2) {
return t1.freq <= t2.freq;
}
bool operator>(const WeightedToken &t1, const WeightedToken &t2) {
return t1.freq > t2.freq;
}
bool operator>=(const WeightedToken &t1, const WeightedToken &t2) {
return t1.freq >= t2.freq;
}
} // namespace ime::nlp

View File

@@ -0,0 +1,51 @@
/*
* Copyright (C) 2021 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLORISBOARD_TOKEN_H
#define FLORISBOARD_TOKEN_H
#include "nlp.h"
#include <string>
namespace ime::nlp {
class Token {
public:
word_t data;
Token();
Token(word_t &&_data);
friend bool operator==(const Token &t1, const Token &t2);
friend bool operator!=(const Token &t1, const Token &t2);
};
class WeightedToken : public Token {
public:
freq_t freq;
WeightedToken();
WeightedToken(word_t &&_data, freq_t _freq);
friend bool operator==(const WeightedToken &t1, const WeightedToken &t2);
friend bool operator!=(const WeightedToken &t1, const WeightedToken &t2);
friend bool operator<(const WeightedToken &t1, const WeightedToken &t2);
friend bool operator<=(const WeightedToken &t1, const WeightedToken &t2);
friend bool operator>(const WeightedToken &t1, const WeightedToken &t2);
friend bool operator>=(const WeightedToken &t1, const WeightedToken &t2);
};
} // namespace ime::nlp
#endif // FLORISBOARD_TOKEN_H

View File

@@ -0,0 +1,10 @@
add_library(
# Name
ime-spelling
# Headers
spellingdict.h
# Sources
spellingdict.cpp
)

View File

@@ -0,0 +1,51 @@
/*
* Copyright (C) 2021 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "spellingdict.h"
#include "utils/log.h"
using namespace ime::spellcheck;
SpellingDict::SpellingDict(const nuspell::Dictionary& dict) : dictionary(std::make_unique<nuspell::Dictionary>(dict))
{ }
SpellingDict::~SpellingDict() = default;
SpellingDict* SpellingDict::load(const std::string &basePath) {
utils::start_stdout_stderr_logger("spell-floris");
try {
auto temp = nuspell::Dictionary::load_from_path(basePath);
auto spellingDict = new SpellingDict(temp);
return spellingDict;
} catch (const nuspell::Dictionary_Loading_Error& e) {
utils::log_error("SpellingDict.load()", e.what());
return nullptr;
} catch (...) {
utils::log_error("SpellingDict.load()", "An unknown error occurred!");
return nullptr;
}
}
bool SpellingDict::spell(const std::string& word) {
bool result = dictionary->spell(word);
return result;
}
std::vector<std::string> SpellingDict::suggest(const std::string &word) {
auto result = std::vector<std::string>();
dictionary->suggest(word, result);
return result;
}

View File

@@ -0,0 +1,42 @@
/*
* Copyright (C) 2021 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLORISBOARD_SPELLINGDICT_H
#define FLORISBOARD_SPELLINGDICT_H
#include "nuspell/dictionary.hxx"
#include <string>
#include <vector>
namespace ime::spellcheck {
class SpellingDict {
public:
SpellingDict(const nuspell::Dictionary& dict);
~SpellingDict();
static SpellingDict* load(const std::string& basePath);
bool spell(const std::string& word);
std::vector<std::string> suggest(const std::string& word);
private:
std::unique_ptr<nuspell::Dictionary> dictionary;
};
} // namespace ime::spellcheck
#endif // FLORISBOARD_SPELLINGDICT_H

View File

@@ -0,0 +1,61 @@
add_library(nuspell
aff_data.cxx aff_data.hxx
checker.cxx checker.hxx
suggester.cxx suggester.hxx
dictionary.cxx dictionary.hxx
finder.cxx finder.hxx
unicode.hxx
utils.cxx utils.hxx
structures.hxx)
add_library(Nuspell::nuspell ALIAS nuspell)
#include(GenerateExportHeader)
#generate_export_header(nuspell)
#set(nuspell_headers aff_data.hxx checker.hxx suggester.hxx dictionary.hxx
# finder.hxx structures.hxx unicode.hxx
# ${CMAKE_CURRENT_BINARY_DIR}/nuspell_export.h)
#[[set_target_properties(nuspell PROPERTIES
PUBLIC_HEADER "${nuspell_headers}"
VERSION ${PROJECT_VERSION}
SOVERSION ${PROJECT_VERSION_MAJOR}f
CXX_VISIBILITY_PRESET hidden)]]
#target_compile_features(nuspell PUBLIC cxx_std_17)
#[[target_include_directories(nuspell
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
target_link_libraries(nuspell PUBLIC ICU::uc ICU::data)
add_executable(nuspell-bin main.cxx)
set_target_properties(nuspell-bin PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ../tools
RUNTIME_OUTPUT_NAME nuspell)
target_compile_definitions(nuspell-bin PRIVATE
PROJECT_VERSION=\"${PROJECT_VERSION}\")
target_link_libraries(nuspell-bin nuspell)
if (BUILD_SHARED_LIBS AND WIN32)
# This should be PRE_LINK (or PRE_BUILD), so Vcpkg's POST_BUILD
# step (see VCPKG_APPLOCAL_DEPS) that copies dll can pick up nuspell.dll
# inside the folder ../tools.
add_custom_command(TARGET nuspell-bin PRE_LINK
COMMAND ${CMAKE_COMMAND} -E copy_if_different
$<TARGET_FILE:nuspell> $<TARGET_FILE_DIR:nuspell-bin>)
endif()
if (NOT subproject)
install(TARGETS nuspell
EXPORT NuspellTargets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/nuspell)
install(EXPORT NuspellTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/nuspell
NAMESPACE Nuspell::)
install(TARGETS nuspell-bin DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()]]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,173 @@
/* Copyright 2016-2021 Dimitrij Mijoski
*
* This file is part of Nuspell.
*
* Nuspell is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Nuspell is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NUSPELL_AFF_DATA_HXX
#define NUSPELL_AFF_DATA_HXX
#include "nuspell_export.h"
#include "structures.hxx"
#include <iosfwd>
#include <unicode/locid.h>
namespace nuspell {
inline namespace v5 {
class Encoding {
std::string name;
NUSPELL_EXPORT auto normalize_name() -> void;
public:
enum Enc_Type { SINGLEBYTE = false, UTF8 = true };
Encoding() = default;
explicit Encoding(const std::string& e) : name(e) { normalize_name(); }
explicit Encoding(std::string&& e) : name(move(e)) { normalize_name(); }
explicit Encoding(const char* e) : name(e) { normalize_name(); }
auto& operator=(const std::string& e)
{
name = e;
normalize_name();
return *this;
}
auto& operator=(std::string&& e)
{
name = move(e);
normalize_name();
return *this;
}
auto& operator=(const char* e)
{
name = e;
normalize_name();
return *this;
}
auto empty() const { return name.empty(); }
auto& value() const { return name; }
auto is_utf8() const { return name == "UTF-8"; }
auto value_or_default() const -> std::string
{
if (name.empty())
return "ISO8859-1";
else
return name;
}
operator Enc_Type() const { return is_utf8() ? UTF8 : SINGLEBYTE; }
};
enum class Flag_Type { SINGLE_CHAR, DOUBLE_CHAR, NUMBER, UTF8 };
/**
* @internal
* @brief Map between words and word_flags.
*
* Flags are stored as part of the container. Maybe for the future flags should
* be stored elsewhere (flag aliases) and this should store pointers.
*
* Does not store morphological data as is low priority feature and is out of
* scope.
*/
using Word_List = Hash_Multimap<std::string, Flag_Set>;
struct Aff_Data {
static constexpr auto HIDDEN_HOMONYM_FLAG = char16_t(-1);
static constexpr auto MAX_SUGGESTIONS = size_t(16);
// spell checking options
Word_List words;
Prefix_Table prefixes;
Suffix_Table suffixes;
bool complex_prefixes;
bool fullstrip;
bool checksharps;
bool forbid_warn;
char16_t compound_onlyin_flag;
char16_t circumfix_flag;
char16_t forbiddenword_flag;
char16_t keepcase_flag;
char16_t need_affix_flag;
char16_t warn_flag;
// compounding options
char16_t compound_flag;
char16_t compound_begin_flag;
char16_t compound_last_flag;
char16_t compound_middle_flag;
Compound_Rule_Table compound_rules;
// spell checking options
Break_Table break_table;
Substr_Replacer input_substr_replacer;
std::string ignored_chars;
icu::Locale icu_locale;
Substr_Replacer output_substr_replacer;
// suggestion options
Replacement_Table replacements;
std::vector<Similarity_Group> similarities;
std::string keyboard_closeness;
std::string try_chars;
// Phonetic_Table phonetic_table;
char16_t nosuggest_flag;
char16_t substandard_flag;
unsigned short max_compound_suggestions;
unsigned short max_ngram_suggestions;
unsigned short max_diff_factor;
bool only_max_diff;
bool no_split_suggestions;
bool suggest_with_dots;
// compounding options
unsigned short compound_min_length;
unsigned short compound_max_word_count;
char16_t compound_permit_flag;
char16_t compound_forbid_flag;
char16_t compound_root_flag;
char16_t compound_force_uppercase;
bool compound_more_suffixes;
bool compound_check_duplicate;
bool compound_check_rep;
bool compound_check_case;
bool compound_check_triple;
bool compound_simplified_triple;
bool compound_syllable_num;
unsigned short compound_syllable_max;
std::string compound_syllable_vowels;
std::vector<Compound_Pattern> compound_patterns;
// data members used only while parsing
Flag_Type flag_type;
Encoding encoding;
std::vector<Flag_Set> flag_aliases;
std::string wordchars; // deprecated?
auto parse_aff(std::istream& in) -> bool;
auto parse_dic(std::istream& in) -> bool;
auto parse_aff_dic(std::istream& aff, std::istream& dic)
{
if (parse_aff(aff))
return parse_dic(dic);
return false;
}
};
} // namespace v5
} // namespace nuspell
#endif // NUSPELL_AFF_DATA_HXX

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,352 @@
/* Copyright 2016-2021 Dimitrij Mijoski
*
* This file is part of Nuspell.
*
* Nuspell is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Nuspell is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NUSPELL_CHECKER_HXX
#define NUSPELL_CHECKER_HXX
#include "aff_data.hxx"
namespace nuspell {
inline namespace v5 {
enum Affixing_Mode {
FULL_WORD,
AT_COMPOUND_BEGIN,
AT_COMPOUND_END,
AT_COMPOUND_MIDDLE
};
struct Affixing_Result_Base {
Word_List::const_pointer root_word = {};
operator Word_List::const_pointer() const { return root_word; }
auto& operator*() const { return *root_word; }
auto operator->() const { return root_word; }
};
template <class T1 = void, class T2 = void>
struct Affixing_Result : Affixing_Result_Base {
const T1* a = {};
const T2* b = {};
Affixing_Result() = default;
Affixing_Result(Word_List::const_reference r, const T1& a, const T2& b)
: Affixing_Result_Base{&r}, a{&a}, b{&b}
{
}
};
template <class T1>
struct Affixing_Result<T1, void> : Affixing_Result_Base {
const T1* a = {};
Affixing_Result() = default;
Affixing_Result(Word_List::const_reference r, const T1& a)
: Affixing_Result_Base{&r}, a{&a}
{
}
};
template <>
struct Affixing_Result<void, void> : Affixing_Result_Base {
Affixing_Result() = default;
Affixing_Result(Word_List::const_reference r) : Affixing_Result_Base{&r}
{
}
};
struct Compounding_Result {
Word_List::const_pointer word_entry = {};
unsigned char num_words_modifier = {};
signed char num_syllable_modifier = {};
bool affixed_and_modified = {}; /**< non-zero affix */
operator Word_List::const_pointer() const { return word_entry; }
auto& operator*() const { return *word_entry; }
auto operator->() const { return word_entry; }
};
struct Checker : public Aff_Data {
enum Forceucase : bool {
FORBID_BAD_FORCEUCASE = false,
ALLOW_BAD_FORCEUCASE = true
};
enum Hidden_Homonym : bool {
ACCEPT_HIDDEN_HOMONYM = false,
SKIP_HIDDEN_HOMONYM = true
};
Checker()
: Aff_Data() // we explicity do value init so content is zeroed
{
}
auto spell_priv(std::string& s) const -> bool;
auto spell_break(std::string& s, size_t depth = 0) const -> bool;
auto spell_casing(std::string& s) const -> const Flag_Set*;
auto spell_casing_upper(std::string& s) const -> const Flag_Set*;
auto spell_casing_title(std::string& s) const -> const Flag_Set*;
auto spell_sharps(std::string& base, size_t n_pos = 0, size_t n = 0,
size_t rep = 0) const -> const Flag_Set*;
auto check_word(std::string& s, Forceucase allow_bad_forceucase = {},
Hidden_Homonym skip_hidden_homonym = {}) const
-> const Flag_Set*;
auto check_simple_word(std::string& word,
Hidden_Homonym skip_hidden_homonym = {}) const
-> const Flag_Set*;
template <Affixing_Mode m>
auto affix_NOT_valid(const Prefix& a) const;
template <Affixing_Mode m>
auto affix_NOT_valid(const Suffix& a) const;
template <Affixing_Mode m, class AffixT>
auto outer_affix_NOT_valid(const AffixT& a) const;
template <class AffixT>
auto is_circumfix(const AffixT& a) const;
template <Affixing_Mode m>
auto is_valid_inside_compound(const Flag_Set& flags) const;
template <Affixing_Mode m = FULL_WORD>
auto strip_prefix_only(std::string& s,
Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<Prefix>;
template <Affixing_Mode m = FULL_WORD>
auto strip_suffix_only(std::string& s,
Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<Suffix>;
template <Affixing_Mode m = FULL_WORD>
auto
strip_prefix_then_suffix(std::string& s,
Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<Suffix, Prefix>;
template <Affixing_Mode m>
auto strip_pfx_then_sfx_2(const Prefix& pe, std::string& s,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<Suffix, Prefix>;
template <Affixing_Mode m = FULL_WORD>
auto
strip_suffix_then_prefix(std::string& s,
Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<Prefix, Suffix>;
template <Affixing_Mode m>
auto strip_sfx_then_pfx_2(const Suffix& se, std::string& s,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<Prefix, Suffix>;
template <Affixing_Mode m = FULL_WORD>
auto strip_prefix_then_suffix_commutative(
std::string& word, Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<Suffix, Prefix>;
template <Affixing_Mode m = FULL_WORD>
auto strip_pfx_then_sfx_comm_2(const Prefix& pe, std::string& word,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<Suffix, Prefix>;
template <Affixing_Mode m = FULL_WORD>
auto
strip_suffix_then_suffix(std::string& s,
Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<Suffix, Suffix>;
template <Affixing_Mode m>
auto strip_sfx_then_sfx_2(const Suffix& se1, std::string& s,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<Suffix, Suffix>;
template <Affixing_Mode m = FULL_WORD>
auto
strip_prefix_then_prefix(std::string& s,
Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<Prefix, Prefix>;
template <Affixing_Mode m>
auto strip_pfx_then_pfx_2(const Prefix& pe1, std::string& s,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<Prefix, Prefix>;
template <Affixing_Mode m = FULL_WORD>
auto strip_prefix_then_2_suffixes(
std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<>;
template <Affixing_Mode m>
auto strip_pfx_2_sfx_3(const Prefix& pe1, const Suffix& se1,
std::string& s,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<>;
template <Affixing_Mode m = FULL_WORD>
auto strip_suffix_prefix_suffix(
std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<>;
template <Affixing_Mode m>
auto strip_s_p_s_3(const Suffix& se1, const Prefix& pe1,
std::string& word,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<>;
template <Affixing_Mode m = FULL_WORD>
auto strip_2_suffixes_then_prefix(
std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<>;
template <Affixing_Mode m>
auto strip_2_sfx_pfx_3(const Suffix& se1, const Suffix& se2,
std::string& word,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<>;
template <Affixing_Mode m = FULL_WORD>
auto strip_suffix_then_2_prefixes(
std::string& s, Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<>;
template <Affixing_Mode m>
auto strip_sfx_2_pfx_3(const Suffix& se1, const Prefix& pe1,
std::string& s,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<>;
template <Affixing_Mode m = FULL_WORD>
auto strip_prefix_suffix_prefix(
std::string& word, Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<>;
template <Affixing_Mode m>
auto strip_p_s_p_3(const Prefix& pe1, const Suffix& se1,
std::string& word,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<>;
template <Affixing_Mode m = FULL_WORD>
auto strip_2_prefixes_then_suffix(
std::string& word, Hidden_Homonym skip_hidden_homonym = {}) const
-> Affixing_Result<>;
template <Affixing_Mode m>
auto strip_2_pfx_sfx_3(const Prefix& pe1, const Prefix& pe2,
std::string& word,
Hidden_Homonym skip_hidden_homonym) const
-> Affixing_Result<>;
auto check_compound(std::string& word,
Forceucase allow_bad_forceucase) const
-> Compounding_Result;
template <Affixing_Mode m = AT_COMPOUND_BEGIN>
auto check_compound(std::string& word, size_t start_pos,
size_t num_part, std::string& part,
Forceucase allow_bad_forceucase) const
-> Compounding_Result;
template <Affixing_Mode m = AT_COMPOUND_BEGIN>
auto check_compound_classic(std::string& word, size_t start_pos,
size_t i, size_t num_part,
std::string& part,
Forceucase allow_bad_forceucase) const
-> Compounding_Result;
template <Affixing_Mode m = AT_COMPOUND_BEGIN>
auto check_compound_with_pattern_replacements(
std::string& word, size_t start_pos, size_t i, size_t num_part,
std::string& part, Forceucase allow_bad_forceucase) const
-> Compounding_Result;
template <Affixing_Mode m>
auto check_word_in_compound(std::string& s) const -> Compounding_Result;
auto calc_num_words_modifier(const Prefix& pfx) const -> unsigned char;
template <Affixing_Mode m>
auto calc_syllable_modifier(Word_List::const_reference we) const
-> signed char;
template <Affixing_Mode m>
auto calc_syllable_modifier(Word_List::const_reference we,
const Suffix& sfx) const -> signed char;
auto count_syllables(std::string_view word) const -> size_t;
auto check_compound_with_rules(std::string& word,
std::vector<const Flag_Set*>& words_data,
size_t start_pos, std::string& part,
Forceucase allow_bad_forceucase) const
-> Compounding_Result;
auto is_rep_similar(std::string& word) const -> bool;
};
template <Affixing_Mode m>
auto Checker::affix_NOT_valid(const Prefix& e) const
{
if (m == FULL_WORD && e.cont_flags.contains(compound_onlyin_flag))
return true;
if (m == AT_COMPOUND_END &&
!e.cont_flags.contains(compound_permit_flag))
return true;
if (m != FULL_WORD && e.cont_flags.contains(compound_forbid_flag))
return true;
return false;
}
template <Affixing_Mode m>
auto Checker::affix_NOT_valid(const Suffix& e) const
{
if (m == FULL_WORD && e.cont_flags.contains(compound_onlyin_flag))
return true;
if (m == AT_COMPOUND_BEGIN &&
!e.cont_flags.contains(compound_permit_flag))
return true;
if (m != FULL_WORD && e.cont_flags.contains(compound_forbid_flag))
return true;
return false;
}
template <Affixing_Mode m, class AffixT>
auto Checker::outer_affix_NOT_valid(const AffixT& e) const
{
if (affix_NOT_valid<m>(e))
return true;
if (e.cont_flags.contains(need_affix_flag))
return true;
return false;
}
template <class AffixT>
auto Checker::is_circumfix(const AffixT& a) const
{
return a.cont_flags.contains(circumfix_flag);
}
template <class AffixInner, class AffixOuter>
auto cross_valid_inner_outer(const AffixInner& inner, const AffixOuter& outer)
{
return inner.cont_flags.contains(outer.flag);
}
template <class Affix>
auto cross_valid_inner_outer(const Flag_Set& word_flags, const Affix& afx)
{
return word_flags.contains(afx.flag);
}
} // namespace v5
} // namespace nuspell
#endif // NUSPELL_CHECKER_HXX

View File

@@ -0,0 +1 @@
clang-format -style=file -i *.[ch]xx

Some files were not shown because too many files have changed in this diff Show More