/* * Authors: * - Ingroj Shrestha , Nepali NLP Group * - Oleg Bartunov , Postgres Professional Ltd. * - Shreeya Singh Dhakal, Nepali NLP Group */ routines ( remove_category_1 check_category_2 remove_category_2 remove_category_3 ) stringescapes {} stringdef DEVANAGARI_SIGN_CANDRABINDU hex '0901' stringdef DEVANAGARI_SIGN_ANUSVARA hex '0902' stringdef DEVANAGARI_LETTER_I hex '0907' stringdef DEVANAGARI_LETTER_II hex '0908' stringdef DEVANAGARI_LETTER_E hex '090f' stringdef DEVANAGARI_LETTER_KA hex '0915' stringdef DEVANAGARI_LETTER_KHA hex '0916' stringdef DEVANAGARI_LETTER_GA hex '0917' stringdef DEVANAGARI_LETTER_CHA hex '091b' stringdef DEVANAGARI_LETTER_TA hex '0924' stringdef DEVANAGARI_LETTER_THA hex '0925' stringdef DEVANAGARI_LETTER_DA hex '0926' stringdef DEVANAGARI_LETTER_NA hex '0928' stringdef DEVANAGARI_LETTER_PA hex '092a' stringdef DEVANAGARI_LETTER_PHA hex '092b' stringdef DEVANAGARI_LETTER_BHA hex '092d' stringdef DEVANAGARI_LETTER_MA hex '092e' stringdef DEVANAGARI_LETTER_YA hex '092f' stringdef DEVANAGARI_LETTER_RA hex '0930' stringdef DEVANAGARI_LETTER_LA hex '0932' stringdef DEVANAGARI_LETTER_VA hex '0935' stringdef DEVANAGARI_LETTER_SA hex '0938' stringdef DEVANAGARI_LETTER_HA hex '0939' stringdef DEVANAGARI_VOWEL_SIGN_AA hex '093e' stringdef DEVANAGARI_VOWEL_SIGN_I hex '093f' stringdef DEVANAGARI_VOWEL_SIGN_II hex '0940' stringdef DEVANAGARI_VOWEL_SIGN_U hex '0941' stringdef DEVANAGARI_VOWEL_SIGN_UU hex '0942' stringdef DEVANAGARI_VOWEL_SIGN_E hex '0947' stringdef DEVANAGARI_VOWEL_SIGN_AI hex '0948' stringdef DEVANAGARI_VOWEL_SIGN_O hex '094b' stringdef DEVANAGARI_VOWEL_SIGN_AU hex '094c' stringdef DEVANAGARI_SIGN_VIRAMA hex '094d' externals ( stem ) backwardmode ( define remove_category_1 as( [substring] among ( '{DEVANAGARI_LETTER_MA}{DEVANAGARI_VOWEL_SIGN_AA}{DEVANAGARI_LETTER_RA}{DEVANAGARI_SIGN_VIRAMA}{DEVANAGARI_LETTER_PHA}{DEVANAGARI_LETTER_TA}' '{DEVANAGARI_LETTER_DA}{DEVANAGARI_SIGN_VIRAMA}{DEVANAGARI_LETTER_VA}{DEVANAGARI_VOWEL_SIGN_AA}{DEVANAGARI_LETTER_RA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_CANDRABINDU}{DEVANAGARI_LETTER_GA}{DEVANAGARI_VOWEL_SIGN_AI}' '{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_ANUSVARA}{DEVANAGARI_LETTER_GA}' '{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_CANDRABINDU}{DEVANAGARI_LETTER_GA}' '{DEVANAGARI_LETTER_LA}{DEVANAGARI_VOWEL_SIGN_AA}{DEVANAGARI_LETTER_I}' '{DEVANAGARI_LETTER_LA}{DEVANAGARI_VOWEL_SIGN_AA}{DEVANAGARI_LETTER_II}' '{DEVANAGARI_LETTER_PA}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_I}' '{DEVANAGARI_LETTER_LA}{DEVANAGARI_VOWEL_SIGN_E}' '{DEVANAGARI_LETTER_RA}{DEVANAGARI_LETTER_TA}' '{DEVANAGARI_LETTER_MA}{DEVANAGARI_VOWEL_SIGN_AI}' '{DEVANAGARI_LETTER_MA}{DEVANAGARI_VOWEL_SIGN_AA}' (delete) '{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_I}' '{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_II}' '{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AI}'(('{DEVANAGARI_LETTER_E}' or '{DEVANAGARI_VOWEL_SIGN_E}' ()) or delete) ) ) define check_category_2 as( [substring] among( '{DEVANAGARI_SIGN_CANDRABINDU}' '{DEVANAGARI_SIGN_ANUSVARA}' '{DEVANAGARI_VOWEL_SIGN_AI}' ) ) define remove_category_2 as ( [substring] among( '{DEVANAGARI_SIGN_CANDRABINDU}' '{DEVANAGARI_SIGN_ANUSVARA}' ('{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_AU}' or '{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_AU}' or '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_AU}' or '{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_E}' delete) '{DEVANAGARI_VOWEL_SIGN_AI}' ('{DEVANAGARI_LETTER_TA}{DEVANAGARI_SIGN_VIRAMA}{DEVANAGARI_LETTER_RA}' delete) ) ) define remove_category_3 as( [substring] among( '{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_HA}{DEVANAGARI_VOWEL_SIGN_U}{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}' '{DEVANAGARI_LETTER_HA}{DEVANAGARI_VOWEL_SIGN_U}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}{DEVANAGARI_LETTER_CHA}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_II}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_II}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_CHA}{DEVANAGARI_SIGN_VIRAMA}{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_THA}{DEVANAGARI_SIGN_VIRAMA}{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_THA}{DEVANAGARI_SIGN_VIRAMA}{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_KHA}{DEVANAGARI_VOWEL_SIGN_I}' '{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_KHA}{DEVANAGARI_VOWEL_SIGN_II}' '{DEVANAGARI_LETTER_LA}{DEVANAGARI_VOWEL_SIGN_AA}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_MA}{DEVANAGARI_VOWEL_SIGN_AA}{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_I}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AI}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_LETTER_HA}{DEVANAGARI_VOWEL_SIGN_O}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}{DEVANAGARI_LETTER_CHA}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}{DEVANAGARI_LETTER_CHA}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_U}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_II}' '{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_II}' '{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AI}' '{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_AI}' '{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_KA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_U}' '{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_U}' '{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_NA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_CHA}{DEVANAGARI_LETTER_SA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_E}' '{DEVANAGARI_LETTER_PA}{DEVANAGARI_LETTER_RA}{DEVANAGARI_SIGN_VIRAMA}' '{DEVANAGARI_LETTER_BHA}{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_HA}{DEVANAGARI_LETTER_RA}{DEVANAGARI_VOWEL_SIGN_U}' '{DEVANAGARI_LETTER_HA}{DEVANAGARI_LETTER_RA}{DEVANAGARI_VOWEL_SIGN_UU}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_AI}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_AI}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}' '{DEVANAGARI_LETTER_I}{DEVANAGARI_LETTER_CHA}' '{DEVANAGARI_VOWEL_SIGN_I}{DEVANAGARI_LETTER_CHA}' '{DEVANAGARI_LETTER_E}{DEVANAGARI_LETTER_CHA}' '{DEVANAGARI_VOWEL_SIGN_E}{DEVANAGARI_LETTER_CHA}' '{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_U}' '{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_E}' '{DEVANAGARI_LETTER_CHA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_II}' '{DEVANAGARI_LETTER_THA}{DEVANAGARI_VOWEL_SIGN_E}' '{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_AA}' '{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_II}' '{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_AI}' '{DEVANAGARI_LETTER_DA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_U}' '{DEVANAGARI_LETTER_NA}{DEVANAGARI_VOWEL_SIGN_E}' '{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_O}' '{DEVANAGARI_LETTER_YA}{DEVANAGARI_VOWEL_SIGN_AU}' '{DEVANAGARI_LETTER_CHA}' (delete) ) ) ) define stem as ( backwards ( do remove_category_1 do ( repeat (check_category_2 and remove_category_2 and remove_category_3 or remove_category_3) ) ) )