{ "id": "outlier", "type": "outlier_detection", "modelId": "outlier_detection_v1", "uidField": "id", "dataFormat": "solr", "trainingCollection": "searchhub", "fieldToVectorize": "body", "trainingDataFilterQuery": "*:*", "trainingDataSamplingPercentage": "1.0", "sourceFields": "", "analyzerConfig": "{ \"analyzers\": [ { \"name\": \"StdTokLowerStop\", \"charFilters\": [ { \"type\": \"htmlstrip\" } ], \"tokenizer\": { \"type\": \"letter\" }, \"filters\": [ { \"type\": \"lowercase\" }, { \"type\": \"length\", \"min\": \"3\", \"max\": \"32767\" }, { \"type\": \"stop\", \"ignoreCase\": \"true\", \"format\": \"snowball\", \"words\": \"org/apache/lucene/analysis/snowball/english_stop.txt\" }, { \"type\": \"englishminimalstem\" }] }], \"fields\": [{ \"regex\": \".+\", \"analyzer\": \"StdTokLowerStop\" } ]}", "outlierGroupIdField": "outlier_group_id", "outlierGroupLabelField": "outlier_group_label", "freqTermField": "freq_terms", "distToCenterField": "dist_to_center", "outputCollection": "outlier_output", "outputOutliersOnly": "false", "minDF": "5.0", "maxDF": "0.75", "numKeywordsPerLabel": "5", "randomSeed": "10", "outlierK": "10", "outlierThreshold": "0.01" }