import re import pickle import numpy as np import gradio as gr from nltk.stem.porter import PorterStemmer from sklearn.feature_extraction.text import CountVectorizer # File Paths model_path = 'loj_reg_twitter_sentiment.sav' bow_vectorizer_path = "bow_vectorizer.sav" # Loading the files model = pickle.load(open(model_path, 'rb')) bow_vectorizer = pickle.load(open(bow_vectorizer_path, 'rb')) stemmer = PorterStemmer() labels = ["negative", "positive"]#classes[target].values() Examples = [ "Very bad, worst", "perfect, very good", "I just had the best meal at my favorite restaurant. The food was delicious and the service was fantastic!", " I'm so disappointed with the customer service I received from this company. They were unhelpful and rude, and I won't be using their services again." ] # Load the model def text_preprocessing(input_txt, pattern:str="@[\w]*"): # Finding all the texts which fits the pattern r = re.findall(pattern, input_txt) # removing this words for word in r: input_txt = re.sub(word, "", input_txt) # removing special characters input_txt = input_txt.replace("[^a-zA-Z#]", " ") # standart lowercase input_txt = str.lower(input_txt) # tokenization tokens = input_txt.split() # stemming for standardization tokens = [stemmer.stem(word) for word in tokens] # concatenating the words sentence = " ".join(tokens) return sentence def vectorizer(sentence): return bow_vectorizer.transform(sentence) def predict(text): # preparing the input into convenient form sentence = text_preprocessing(text) # vectorizing the data features = vectorizer([sentence]) # prediction probabilities = model.predict_proba(features) #.predict(features) probs = probabilities.flatten() # output form results = {l : np.round(p, 3) for l, p in zip(labels, probs)} return results # GUI Component demo = gr.Interface(predict, "text", "label", examples = Examples) # Launching the demo if __name__ == "__main__": demo.launch()