diff --git a/contextualized_topic_models/utils/preprocessing.py b/contextualized_topic_models/utils/preprocessing.py
index ea7a9d5..7aa43b3 100644
--- a/contextualized_topic_models/utils/preprocessing.py
+++ b/contextualized_topic_models/utils/preprocessing.py
@@ -3,6 +3,9 @@
 from nltk.corpus import stopwords as stop_words
 from gensim.utils import deaccent
 import warnings
+from konlpy.tag import Okt # for Korean natural language processing.
+okt = Okt()
+
 
 class WhiteSpacePreprocessing():
     """
@@ -11,7 +14,6 @@ class WhiteSpacePreprocessing():
 
     def __init__(self, documents, stopwords_language="english", vocabulary_size=2000):
         """
-
         :param documents: list of strings
         :param stopwords_language: string of the language of the stopwords (see nltk stopwords)
         :param vocabulary_size: the number of most frequent words to include in the documents. Infrequent words will be discarded from the list of preprocessed documents
@@ -28,7 +30,6 @@ def preprocess(self):
         """
         Note that if after filtering some documents do not contain words we remove them. That is why we return also the
         list of unpreprocessed documents.
-
         :return: preprocessed documents, unpreprocessed documents and the vocabulary list
         """
         preprocessed_docs_tmp = self.documents
@@ -64,7 +65,6 @@ class WhiteSpacePreprocessingStopwords():
     def __init__(self, documents, stopwords_list=None, vocabulary_size=2000, max_df=1.0, min_words=1,
                  remove_numbers=True):
         """
-
         :param documents: list of strings
         :param stopwords_list: list of the stopwords to remove
         :param vocabulary_size: the number of most frequent words to include in the documents. Infrequent words will be discarded from the list of preprocessed documents
@@ -94,13 +94,13 @@ def preprocess(self):
         """
         Note that if after filtering some documents do not contain words we remove them. That is why we return also the
         list of unpreprocessed documents.
-
         :return: preprocessed documents, unpreprocessed documents and the vocabulary list
         """
         preprocessed_docs_tmp = self.documents
         preprocessed_docs_tmp = [deaccent(doc.lower()) for doc in preprocessed_docs_tmp]
         preprocessed_docs_tmp = [doc.translate(
             str.maketrans(string.punctuation, ' ' * len(string.punctuation))) for doc in preprocessed_docs_tmp]
+
         if self.remove_numbers:
             preprocessed_docs_tmp = [doc.translate(str.maketrans("0123456789", ' ' * len("0123456789")))
                                      for doc in preprocessed_docs_tmp]
@@ -124,4 +124,76 @@ def preprocess(self):
 
         return preprocessed_docs, unpreprocessed_docs, vocabulary
 
+class WhiteSpacePreprocessingStopwordsKorean():
+    """
+    Provides a very simple preprocessing script that filters infrequent tokens from text
+    """
+
+    def __init__(self, documents, stopwords_list=None, vocabulary_size=2000, max_df=1.0, min_words=1,
+                 remove_numbers=True):
+        """
+        :param documents: list of strings
+        :param stopwords_list: list of the stopwords to remove
+        :param vocabulary_size: the number of most frequent words to include in the documents. Infrequent words will be discarded from the list of preprocessed documents
+        :param max_df : float or int, default=1.0
+        When building the vocabulary ignore terms that have a document
+        frequency strictly higher than the given threshold (corpus-specific
+        stop words).
+        If float in range [0.0, 1.0], the parameter represents a proportion of
+        documents, integer absolute counts.
+        This parameter is ignored if vocabulary is not None.
+        :param min_words: int, default=1. Documents with less words than the parameter
+        will be removed
+        :param remove_numbers: bool, default=True. If true, numbers are removed from docs
+        """
+        self.documents = documents
+        if stopwords_list is not None:
+            self.stopwords = set(stopwords_list)
+        else:
+            self.stopwords = []
 
+        self.vocabulary_size = vocabulary_size
+        self.max_df = max_df
+        self.min_words = min_words
+        self.remove_numbers = remove_numbers
+
+    def preprocess(self):
+        """
+        Note that if after filtering some documents do not contain words we remove them. That is why we return also the
+        list of unpreprocessed documents.
+        :return: preprocessed documents, unpreprocessed documents and the vocabulary list
+        
+        Note that for Korean language support, it uses konlpy to use its tokenizer.
+        """
+        preprocessed_docs_tmp = self.documents
+        preprocessed_docs_tmp = [deaccent(doc.lower()) for doc in preprocessed_docs_tmp]
+
+        korean_tmp = []
+        for doc in preprocessed_docs_tmp :
+            tmp = okt.nouns(doc)
+            sent = ''
+            for t in tmp :
+                sent = sent + str(t) + ' '
+            korean_tmp.append(sent)
+
+        preprocessed_docs_tmp = korean_tmp
+
+        preprocessed_docs_tmp = [' '.join([w for w in doc.split() if len(w) > 0 and w not in self.stopwords])
+                                 for doc in preprocessed_docs_tmp]
+
+        vectorizer = CountVectorizer(max_features=self.vocabulary_size, max_df=self.max_df)
+        vectorizer.fit_transform(preprocessed_docs_tmp)
+        temp_vocabulary = set(vectorizer.get_feature_names())
+
+        preprocessed_docs_tmp = [' '.join([w for w in doc.split() if w in temp_vocabulary])
+                                 for doc in preprocessed_docs_tmp]
+
+        preprocessed_docs, unpreprocessed_docs = [], []
+        for i, doc in enumerate(preprocessed_docs_tmp):
+            if len(doc) > 0 and len(doc) >= self.min_words:
+                preprocessed_docs.append(doc)
+                unpreprocessed_docs.append(self.documents[i])
+
+        vocabulary = list(set([item for doc in preprocessed_docs for item in doc.split()]))
+
+        return preprocessed_docs, unpreprocessed_docs, vocabulary