Create an analyzer for a language that doesn’t exist natively

Ethan_Steininger · July 5, 2022, 2:08pm

Currently there is no support for Hebrew analyzers, this showcases the ability to create a language analyzer using a custom array of stopwords:

 {
  "analyzer": "lucene.standard",
  "mappings": {
    "dynamic": false,
    "fields": {
      "description": {
        "type": "string",
        "analyzer": "almostHebrew"
      }
    }
  },
  "analyzers": [{
    "charFilters": [],
    "name": "almostHebrew",
    "tokenizer": {
      "type": "standard"
    },
    "tokenFilters": [{
        "type": "icuFolding"
      },
      {
        "type": "stopword",
        "tokens": ["אני","אתה", "אשר","או"]
      }
    ]
  }]
}