Add IME model module

This module contains all data objects that are both relevant for the plugin module and the core app module
This commit is contained in:
Patrick Goldinger
2023-05-26 01:39:09 +02:00
parent dded3dddc9
commit 3bf8264d0b
9 changed files with 460 additions and 43 deletions

View File

@@ -0,0 +1,69 @@
/*
* Copyright 2023 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Suppress needed until https://youtrack.jetbrains.com/issue/KTIJ-19369 is fixed
@file:Suppress("DSL_SCOPE_VIOLATION")
plugins {
alias(libs.plugins.agp.library)
alias(libs.plugins.kotlin.android)
alias(libs.plugins.kotlin.serialization)
}
val projectCompileSdk: String by project
val projectMinSdk: String by project
val projectVersionName: String by project
val projectVersionNameSuffix: String by project
android {
namespace = "dev.patrickgold.florisboard.ime"
compileSdk = projectCompileSdk.toInt()
defaultConfig {
minSdk = projectMinSdk.toInt()
}
compileOptions {
sourceCompatibility = JavaVersion.VERSION_1_8
targetCompatibility = JavaVersion.VERSION_1_8
}
kotlinOptions {
jvmTarget = "1.8"
}
sourceSets {
maybeCreate("main").apply {
java.srcDir("src/main/kotlin")
}
}
}
dependencies {
implementation(libs.kotlinx.serialization.json)
}
tasks.withType<Test> {
useJUnitPlatform()
}
/*val sourcesJar = tasks.register<Jar>("sourcesJar") {
archiveClassifier.set("sources")
from(android.sourceSets.getByName("main").java.srcDirs)
}
mavenPublishing {
coordinates(projectGroupId, artifactId, projectVersion)
}*/

View File

@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
~ Copyright 2023 Patrick Goldinger
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<manifest />

View File

@@ -0,0 +1,37 @@
/*
* Copyright (C) 2023 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.patrickgold.florisboard.ime.core
import kotlinx.serialization.Serializable
/**
* Data class which represents a computed user-specified set of language and layout.
*
* @property id The ID of this subtype.
* @property primaryLocale The primary locale tag of this subtype.
* @property secondaryLocales The secondary locale tags of this subtype. May be an empty list.
*/
@Serializable
data class ComputedSubtype(
val id: Long,
val primaryLocale: String,
val secondaryLocales: List<String>,
) {
fun isFallback(): Boolean {
return id < 0
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Patrick Goldinger
* Copyright (C) 2023 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,6 +18,9 @@ package dev.patrickgold.florisboard.ime.input
/**
* Enum for the input shift states of a text keyboard.
*
* Note: This class MUST be kept in sync with the C++ implementation:
* https://github.com/florisboard/nlp/blob/main/nlpcore/src/common/suggestion.cppm
*/
enum class InputShiftState(val value: Int) {
/**

View File

@@ -0,0 +1,195 @@
/*
* Copyright (C) 2022 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.patrickgold.florisboard.ime.nlp
import dev.patrickgold.florisboard.ime.core.ComputedSubtype
/**
* Base interface for any NLP provider implementation. NLP providers maintain their own internal state and only receive
* limited events, such as [create], [preload], [destroy] and group specific requests.
*
* Providers should NEVER do heavy work in the initialization phase of the object, any first-time setup work should be
* exclusively done in [preload].
*/
interface NlpProvider {
/**
* Is called exactly once before any [preload] or task specific requests, which allows to make one-time setups, set
* up necessary native bindings, threads, etc.
*/
fun create()
/**
* Is called at least once before a task specific request occurs, to allow for locale-specific preloading of
* dictionaries and language models.
*
* @param subtype Information about the subtype to preload, primarily used for getting the primary and secondary
* language for correct dictionary selection.
*/
fun preload(subtype: ComputedSubtype)
/**
* Is called when the provider is no longer needed and should be destroyed. Any native allocations should be freed
* up and any asynchronous tasks/threads must be stopped. After this method call finishes, this provider object is
* considered dead and will be queued to be cleaned up by the GC in the next round.
*/
fun destroy()
}
/**
* Interface for an NLP provider specializing in spell check services.
*/
interface SpellingProvider : NlpProvider {
/**
* Spell check given [word] in the primary (and optionally secondary if defined) language of given [subtypeId], and
* return a spelling result. If the given word is spelled correctly, a spelling result with no suggestions should
* be returned.
*
* Spell check requests are considered to be read-only and should at no point be used to train the underlying
* language model and/or weights in the dictionary.
*
* @param subtypeId THe ID of the subtype this request is for, is guaranteed to match one of the subtype IDs which
* have been passed to [preload].
* @param flags The suggestion request flags.
* @param word The word to spell check, may contain any valid Unicode code point.
* @param precedingWords List of preceding words, which allows for a more context-based spellcheck. This list can
* also be empty, if no surrounding context can be provided.
* @param followingWords List of following words, which allows for a more context-based spellcheck. This list can
* also be empty, if no surrounding context can be provided.
*
* @return A spelling result object, which indicates both the validity of this word and if needed suggested
* corrections for the misspelled word.
*/
fun spell(
subtypeId: Long,
flags: SuggestionRequestFlags,
word: String,
precedingWords: List<String>,
followingWords: List<String>,
): SpellingResult
}
/**
* Interface for an NLP provider specializing in next/current-word suggestion and autocorrect services.
*/
interface SuggestionProvider : NlpProvider {
/**
* Callback from the editor logic that the editor content has changed and that new suggestions should be generated
* for the new user input. There is no guarantee that candidates returned are actually used, as there may be sudden
* context changes or clipboard/emoji suggestions overriding the results (if the user has those enabled).
*
* @param subtypeId THe ID of the subtype this request is for, is guaranteed to match one of the subtype IDs which
* have been passed to [preload].
* @param flags The suggestion request flags.
* @param word The current word to use as a base for word prediction, may contain any valid Unicode code point.
* @param precedingWords List of preceding words, which allows for a more context-based word prediction. This list
* can also be empty, if no surrounding context can be provided.
* @param followingWords List of following words, which allows for a more context-based word prediction. This list
* can also be empty, if no surrounding context can be provided.
*
* @return A list of candidate suggestions for the current editor content state, complying with the max count
* restrictions as best as possible. If the provider cannot at all provide any candidates, an empty list should be
* returned, in which case the UI automatically adapts and shows alternative actions.
*/
fun suggest(
subtypeId: Long,
flags: SuggestionRequestFlags,
word: String,
precedingWords: List<String>,
followingWords: List<String>,
): List<SuggestionCandidate>
/**
* Is called when a suggestion has been accepted, either manually by the user or automatically through auto-commit.
* This is purely a notification about an event and can safely be ignored if not needed.
*
* @param subtypeId THe ID of the subtype this request is for, is guaranteed to match one of the subtype IDs which
* have been passed to [preload].
* @param candidate The exact suggestion candidate which has been accepted.
*/
fun notifySuggestionAccepted(subtypeId: Long, candidate: SuggestionCandidate)
/**
* Is called when a previously automatically accepted suggestion has been reverted by the user with backspace. This
* is purely a notification about an event and can safely be ignored if not needed.
*
* @param subtypeId THe ID of the subtype this request is for, is guaranteed to match one of the subtype IDs which
* have been passed to [preload].
* @param candidate The exact suggestion candidate which has been reverted.
*/
fun notifySuggestionReverted(subtypeId: Long, candidate: SuggestionCandidate)
/**
* Called if the user requests to prevent a certain suggested word from showing again. It is up to the actual
* implementation to adhere to this user request, this removal is not enforced nor monitored by the NLP manager.
*
* @param subtypeId THe ID of the subtype this request is for, is guaranteed to match one of the subtype IDs which
* have been passed to [preload].
* @param candidate The exact suggestion candidate which the user does not want to see again.
*
* @return True if the removal request is supported and is accepted, false otherwise.
*/
fun removeSuggestion(subtypeId: Long, candidate: SuggestionCandidate): Boolean
}
/**
* Fallback NLP provider which implements all provider variants. Is used in case no other providers can be found.
*/
object FallbackNlpProvider : SpellingProvider, SuggestionProvider {
override fun create() {
// Do nothing
}
override fun preload(subtype: ComputedSubtype) {
// Do nothing
}
override fun spell(
subtypeId: Long,
flags: SuggestionRequestFlags,
word: String,
precedingWords: List<String>,
followingWords: List<String>
): SpellingResult {
return SpellingResult.unspecified()
}
override fun suggest(
subtypeId: Long,
flags: SuggestionRequestFlags,
word: String,
precedingWords: List<String>,
followingWords: List<String>
): List<SuggestionCandidate> {
return emptyList()
}
override fun notifySuggestionAccepted(subtypeId: Long, candidate: SuggestionCandidate) {
// Do nothing
}
override fun notifySuggestionReverted(subtypeId: Long, candidate: SuggestionCandidate) {
// Do nothing
}
override fun removeSuggestion(subtypeId: Long, candidate: SuggestionCandidate): Boolean {
return false
}
override fun destroy() {
// Do nothing
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Patrick Goldinger
* Copyright (C) 2023 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,8 +16,8 @@
package dev.patrickgold.florisboard.ime.nlp
import android.os.Build
import android.view.textservice.SuggestionsInfo
import dev.patrickgold.florisboard.lib.android.AndroidVersion
/**
* Inline value class wrapping the Android spelling [SuggestionsInfo] class with helpers.
@@ -54,7 +54,7 @@ value class SpellingResult(val suggestionsInfo: SuggestionsInfo) {
* provided corrections.
*/
val isGrammarError: Boolean
get() = AndroidVersion.ATLEAST_API31_S &&
get() = Build.VERSION.SDK_INT >= Build.VERSION_CODES.S &&
suggestionsInfo.suggestionsAttributes and SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_GRAMMAR_ERROR != 0
/**
@@ -114,7 +114,7 @@ value class SpellingResult(val suggestionsInfo: SuggestionsInfo) {
* caller in the service.
*/
fun grammarError(suggestions: Array<out String>, isHighConfidenceResult: Boolean = false): SpellingResult {
val attributes = if (AndroidVersion.ATLEAST_API31_S) {
val attributes = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_GRAMMAR_ERROR
} else {
SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Patrick Goldinger
* Copyright (C) 2023 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,10 +16,7 @@
package dev.patrickgold.florisboard.ime.nlp
import dev.patrickgold.florisboard.R
import dev.patrickgold.florisboard.ime.clipboard.provider.ClipboardItem
import dev.patrickgold.florisboard.ime.clipboard.provider.ItemType
import dev.patrickgold.florisboard.lib.util.NetworkUtils
import kotlinx.serialization.Serializable
/**
* Interface for a candidate item, which is returned by a suggestion provider and used by the UI logic to render
@@ -95,45 +92,14 @@ interface SuggestionCandidate {
*
* @see SuggestionCandidate
*/
@Serializable
data class WordSuggestionCandidate(
override val text: CharSequence,
override val secondaryText: CharSequence? = null,
override val confidence: Double = 0.0,
override val isEligibleForAutoCommit: Boolean = false,
override val isEligibleForUserRemoval: Boolean = true,
override val sourceProvider: SuggestionProvider? = null,
) : SuggestionCandidate {
override val iconId: Int? = null
}
/**
* Default implementation for a clipboard candidate. Should generally not be used by a suggestion provider, except by
* the clipboard suggestion provider.
*
* @see SuggestionCandidate
*/
data class ClipboardSuggestionCandidate(
val clipboardItem: ClipboardItem,
override val sourceProvider: SuggestionProvider?,
) : SuggestionCandidate {
override val text: CharSequence = clipboardItem.stringRepresentation()
override val secondaryText: CharSequence? = null
override val confidence: Double = 1.0
override val isEligibleForAutoCommit: Boolean = false
override val isEligibleForUserRemoval: Boolean = true
override val iconId: Int = when (clipboardItem.type) {
ItemType.TEXT -> when {
NetworkUtils.isEmailAddress(text) -> R.drawable.ic_email
NetworkUtils.isUrl(text) -> R.drawable.ic_link
NetworkUtils.isPhoneNumber(text) -> R.drawable.ic_phone
else -> R.drawable.ic_assignment
}
ItemType.IMAGE -> R.drawable.ic_image
ItemType.VIDEO -> R.drawable.ic_videocam
}
override var sourceProvider: SuggestionProvider? = null
}

View File

@@ -0,0 +1,129 @@
/*
* Copyright (C) 2023 Patrick Goldinger
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.patrickgold.florisboard.ime.nlp
import dev.patrickgold.florisboard.ime.input.InputShiftState
import kotlinx.serialization.KSerializer
import kotlinx.serialization.Serializable
import kotlinx.serialization.descriptors.PrimitiveKind
import kotlinx.serialization.descriptors.PrimitiveSerialDescriptor
import kotlinx.serialization.encoding.Decoder
import kotlinx.serialization.encoding.Encoder
/**
* Class which allows to read 31-bit binary suggestion request flags. Note that the signed bit MUST always be 0, else
* the behavior of this class is undefined.
*
* Layout of the binary flags:
* | Byte 3 | Byte 2 | Byte 1 | Byte 0 |
* |--------|--------|--------|--------|
* |0 | | |11111111| Maximum suggestion count (1-255), 0 indicating no limit.
* |0 | | 1111| | Maximum ngram level (2-15). Values 0 and 1 cause word history to be ignored.
* |0 | | 11 | | Input shift state (0-3) at the start of the current word.
* |0 | |11 | | Input shift state (0-3) at the current cursor position.
* |0 | 1| | | Flag indicating if possibly offensive words should be suggested.
* |0 | 1 | | | Flag indicating if user-hidden words should still be displayed.
* |0 | 1 | | | Flag indicating if the current request is within a private session.
* |01111111|11111 | | | Reserved for future use.
*
* Note: This class MUST be kept in sync with the C++ implementation:
* https://github.com/florisboard/nlp/blob/main/nlpcore/src/common/suggestion.cppm
*/
@Serializable(with = SuggestionRequestFlags.Serializer::class)
@JvmInline
value class SuggestionRequestFlags(val flags: Int) {
companion object {
const val M_MAX_SUGGESTION_COUNT = 0x000000FF
val O_MAX_SUGGESTION_COUNT = M_MAX_SUGGESTION_COUNT.countTrailingZeroBits()
const val M_MAX_NGRAM_LEVEL = 0x00000F00
val O_MAX_NGRAM_LEVEL = M_MAX_NGRAM_LEVEL.countTrailingZeroBits()
const val M_INPUT_SHIFT_STATE_START = 0x00003000
val O_INPUT_SHIFT_STATE_START = M_INPUT_SHIFT_STATE_START.countTrailingZeroBits()
const val M_INPUT_SHIFT_STATE_CURRENT = 0x0000C000
val O_INPUT_SHIFT_STATE_CURRENT = M_INPUT_SHIFT_STATE_CURRENT.countTrailingZeroBits()
const val F_ALLOW_POSSIBLY_OFFENSIVE = 0x00010000
const val F_OVERRIDE_HIDDEN_FLAG = 0x00020000
const val F_IS_PRIVATE_SESSION = 0x00040000
fun new(
maxSuggestionCount: Int,
maxNgramLevel: Int,
issStart: InputShiftState,
issCurrent: InputShiftState,
allowPossiblyOffensive: Boolean,
overrideHiddenFlag: Boolean,
isPrivateSession: Boolean,
): SuggestionRequestFlags {
val flags = ((maxSuggestionCount shl O_MAX_SUGGESTION_COUNT) and M_MAX_SUGGESTION_COUNT) or
((maxNgramLevel shl O_MAX_NGRAM_LEVEL) and M_MAX_NGRAM_LEVEL) or
((issStart.toInt() shl O_INPUT_SHIFT_STATE_START) and M_INPUT_SHIFT_STATE_START) or
((issCurrent.toInt() shl O_INPUT_SHIFT_STATE_CURRENT) and M_INPUT_SHIFT_STATE_CURRENT) or
(if (allowPossiblyOffensive) F_ALLOW_POSSIBLY_OFFENSIVE else 0) or
(if (overrideHiddenFlag) F_OVERRIDE_HIDDEN_FLAG else 0) or
(if (isPrivateSession) F_IS_PRIVATE_SESSION else 0)
return SuggestionRequestFlags(flags)
}
}
fun maxSuggestionCount(): Int {
return (flags and M_MAX_SUGGESTION_COUNT) shr O_MAX_SUGGESTION_COUNT
}
fun maxNgramLevel(): Int {
return (flags and M_MAX_NGRAM_LEVEL) shr O_MAX_NGRAM_LEVEL
}
fun inputShiftStateStart(): InputShiftState {
return InputShiftState.values()[(flags and M_INPUT_SHIFT_STATE_START) shr O_INPUT_SHIFT_STATE_START]
}
fun inputShiftStateCurrent(): InputShiftState {
return InputShiftState.values()[(flags and M_INPUT_SHIFT_STATE_CURRENT) shr O_INPUT_SHIFT_STATE_CURRENT]
}
fun allowPossiblyOffensive(): Boolean {
return (flags and F_ALLOW_POSSIBLY_OFFENSIVE) != 0
}
fun overrideHiddenFlag(): Boolean {
return (flags and F_OVERRIDE_HIDDEN_FLAG) != 0
}
fun isPrivateSession(): Boolean {
return (flags and F_IS_PRIVATE_SESSION) != 0
}
override fun toString(): String {
return "SuggestionRequestFlags { flags = 0x${flags.toString(16)} }"
}
fun toInt(): Int {
return flags
}
object Serializer : KSerializer<SuggestionRequestFlags> {
override val descriptor = PrimitiveSerialDescriptor("SuggestionRequestFlags", PrimitiveKind.STRING)
override fun serialize(encoder: Encoder, value: SuggestionRequestFlags) {
encoder.encodeInt(value.toInt())
}
override fun deserialize(decoder: Decoder): SuggestionRequestFlags {
return SuggestionRequestFlags(decoder.decodeInt())
}
}
}

View File

@@ -35,4 +35,5 @@ dependencyResolutionManagement {
include(":app")
include(":benchmark")
include(":ime-model")
include(":plugin")