{
  "id": "outlier",
  "type": "outlier_detection",
  "modelId": "outlier_detection_v1",
  "uidField": "id",
  "dataFormat": "solr",
  "trainingCollection": "searchhub",
  "fieldToVectorize": "body",
  "trainingDataFilterQuery": "*:*",
  "trainingDataSamplingPercentage": "1.0",
  "sourceFields": "",
  "analyzerConfig": "{
         \"analyzers\": [ { \"name\": \"StdTokLowerStop\",
               \"charFilters\": [ { \"type\": \"htmlstrip\" } ],
               \"tokenizer\": { \"type\": \"letter\" },
               \"filters\": [ 
                     { \"type\": \"lowercase\" },  
                     { \"type\": \"length\", \"min\": \"3\", \"max\": \"32767\" },
                     { \"type\": \"stop\", \"ignoreCase\": \"true\", \"format\": \"snowball\", \"words\": \"org/apache/lucene/analysis/snowball/english_stop.txt\" },
                     { \"type\": \"englishminimalstem\" }] }],
         \"fields\": [{ \"regex\": \".+\", \"analyzer\": \"StdTokLowerStop\" } ]}",
  "outlierGroupIdField": "outlier_group_id",
  "outlierGroupLabelField": "outlier_group_label",
  "freqTermField": "freq_terms",
  "distToCenterField": "dist_to_center",
  "outputCollection": "outlier_output",
  "outputOutliersOnly": "false",
  "minDF": "5.0",
  "maxDF": "0.75",
  "numKeywordsPerLabel": "5",
  "randomSeed": "10",
  "outlierK": "10",
  "outlierThreshold": "0.01"
}