import type {_FeaturePreprocessingParams} from './feature-preprocessing-params';

/**
 * Generated from com.dataiku.dip.analysis.model.preprocessing.TextFeaturePreprocessingParams
 */
export interface TextFeaturePreprocessingParams extends _FeaturePreprocessingParams {
    customStopWords: string;
    customVectorizerCode: string;
    hashSVDSVDComponents: number;
    hashSVDSVDLimit: number;
    hashSize: number;
    maxRowsRatio: number;
    maxSequenceLength: number;
    maxWords: number;
    minRowsRatio: number;
    ngramMaxSize: number;
    ngramMinSize: number;
    sentenceEmbeddingBatchSize: number;
    sentenceEmbeddingModel: string;
    stopWordsMode: TextFeaturePreprocessingParams.StopWordsMode;
    text_handling: TextFeaturePreprocessingParams.TextHandlingMethod;
    useCustomVectorizer: boolean;
}

export namespace TextFeaturePreprocessingParams {
    /**
     * Generated from com.dataiku.dip.analysis.model.preprocessing.TextFeaturePreprocessingParams$StopWordsMode
     */
    export enum StopWordsMode {
        NONE = 'NONE',
        CUSTOM = 'CUSTOM',
        AFRIKAANS = 'AFRIKAANS',
        ALBANIAN = 'ALBANIAN',
        ARABIC = 'ARABIC',
        ARMENIAN = 'ARMENIAN',
        BASQUE = 'BASQUE',
        BENGALI = 'BENGALI',
        BULGARIAN = 'BULGARIAN',
        CATALAN = 'CATALAN',
        CHINESE = 'CHINESE',
        CHINESE_TRADITIONAL = 'CHINESE_TRADITIONAL',
        CROATIAN = 'CROATIAN',
        CZECH = 'CZECH',
        DANISH = 'DANISH',
        DUTCH = 'DUTCH',
        ENGLISH = 'ENGLISH',
        ENGLISH_2021 = 'ENGLISH_2021',
        ESTONIAN = 'ESTONIAN',
        FINNISH = 'FINNISH',
        FRENCH = 'FRENCH',
        FRENCH_2021 = 'FRENCH_2021',
        GERMAN = 'GERMAN',
        GREEK = 'GREEK',
        GUJARATI = 'GUJARATI',
        HEBREW = 'HEBREW',
        HINDI = 'HINDI',
        HUNGARIAN = 'HUNGARIAN',
        ICELANDIC = 'ICELANDIC',
        INDONESIAN = 'INDONESIAN',
        IRISH = 'IRISH',
        ITALIAN = 'ITALIAN',
        JAPANESE = 'JAPANESE',
        KANNADA = 'KANNADA',
        KOREAN = 'KOREAN',
        LATVIAN = 'LATVIAN',
        LITHUANIAN = 'LITHUANIAN',
        LUXEMBOURGISH = 'LUXEMBOURGISH',
        MACEDONIAN = 'MACEDONIAN',
        MALAYALAM = 'MALAYALAM',
        MARATHI = 'MARATHI',
        NEPALI = 'NEPALI',
        NORWEGIAN = 'NORWEGIAN',
        PERSIAN = 'PERSIAN',
        POLISH = 'POLISH',
        PORTUGUESE = 'PORTUGUESE',
        ROMANIAN = 'ROMANIAN',
        RUSSIAN = 'RUSSIAN',
        SANSKRIT = 'SANSKRIT',
        SERBIAN = 'SERBIAN',
        SINHALA = 'SINHALA',
        SLOVAK = 'SLOVAK',
        SLOVENIAN = 'SLOVENIAN',
        SPANISH = 'SPANISH',
        SWEDISH = 'SWEDISH',
        TAGALOG = 'TAGALOG',
        TAMIL = 'TAMIL',
        TATAR = 'TATAR',
        TELUGU = 'TELUGU',
        THAI = 'THAI',
        TURKISH = 'TURKISH',
        UKRAINIAN = 'UKRAINIAN',
        URDU = 'URDU',
        VIETNAMESE = 'VIETNAMESE',
        YORUBA = 'YORUBA'
    }

    /**
     * Generated from com.dataiku.dip.analysis.model.preprocessing.TextFeaturePreprocessingParams$TextHandlingMethod
     */
    export enum TextHandlingMethod {
        TOKENIZE_HASHING = 'TOKENIZE_HASHING',
        TOKENIZE_HASHING_SVD = 'TOKENIZE_HASHING_SVD',
        TOKENIZE_COUNTS = 'TOKENIZE_COUNTS',
        TOKENIZE_TFIDF = 'TOKENIZE_TFIDF',
        SENTENCE_EMBEDDING = 'SENTENCE_EMBEDDING',
        CUSTOM = 'CUSTOM'
    }
}