File size: 6,292 Bytes
6b30efd
 
 
 
 
 
 
 
 
 
 
ecb4f5b
 
6b30efd
ecb4f5b
 
 
6b30efd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13e414c
 
 
 
 
 
 
6b30efd
13e414c
 
6b30efd
13e414c
 
 
 
 
 
 
ecb4f5b
13e414c
 
 
 
 
 
 
 
 
ecb4f5b
13e414c
 
 
6b30efd
13e414c
 
 
 
 
 
 
 
 
6b30efd
13e414c
 
 
 
6b30efd
13e414c
 
6b30efd
13e414c
 
6b30efd
13e414c
 
 
 
6b30efd
13e414c
 
 
 
 
6b30efd
13e414c
ecb4f5b
13e414c
ecb4f5b
13e414c
 
 
 
ecb4f5b
13e414c
 
 
6b30efd
13e414c
 
6b30efd
13e414c
 
 
 
ecb4f5b
13e414c
427cf32
 
13e414c
 
ecb4f5b
13e414c
 
 
 
 
 
 
 
 
 
6b30efd
13e414c
 
 
 
f36a10a
13e414c
 
 
 
 
 
 
 
 
 
 
 
ecb4f5b
13e414c
 
 
 
 
 
 
 
ecb4f5b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# # import streamlit as st
# # import torch
# # from transformers import AutoTokenizer, AutoModelForSequenceClassification

# # # Load the model and tokenizer
# # # @st.cache_resource
# # # def load_model():
# # #     tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
# # #     model = AutoModelForSequenceClassification.from_pretrained("./results/checkpoint-753")
# # #     model.eval()
# # #     return tokenizer, model
# # @st.cache_resource
# # def load_model():
# #     tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
# #     model = AutoModelForSequenceClassification.from_pretrained("./results/checkpoint-753")
# #     model.eval()
# #     return tokenizer, model

# # def predict_news(text, tokenizer, model):
# #     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
# #     with torch.no_grad():
# #         outputs = model(**inputs)
# #     probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
# #     predicted_label = torch.argmax(probabilities, dim=-1).item()
# #     confidence = probabilities[0][predicted_label].item()
# #     return "FAKE" if predicted_label == 1 else "REAL", confidence

# # def main():
# #     st.title("News Classifier")
    
# #     # Load model
# #     tokenizer, model = load_model()
    
# #     # Text input
# #     news_text = st.text_area("Enter news text to analyze:", height=200)
    
# #     if st.button("Classify"):
# #         if news_text:
# #             with st.spinner('Analyzing...'):
# #                 prediction, confidence = predict_news(news_text, tokenizer, model)
                
# #                 # Display results
# #                 if prediction == "FAKE":
# #                     st.error(f"⚠️ {prediction} NEWS")
# #                 else:
# #                     st.success(f"✅ {prediction} NEWS")
                
# #                 st.info(f"Confidence: {confidence*100:.2f}%")

# # if __name__ == "__main__":
# #     main()


import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from fastapi import FastAPI, Request
from pydantic import BaseModel
from threading import Thread
from streamlit.web import cli

# FastAPI app
api_app = FastAPI()

# Load the model and tokenizer
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
    model = AutoModelForSequenceClassification.from_pretrained("./results/checkpoint-753")
    model.eval()
    return tokenizer, model

# Prediction function
def predict_news(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_label = torch.argmax(probabilities, dim=-1).item()
    confidence = probabilities[0][predicted_label].item()
    return "FAKE" if predicted_label == 1 else "REAL", confidence

# FastAPI request model
class NewsInput(BaseModel):
    text: str

# FastAPI route for POST requests
@api_app.post("/classify")
async def classify_news(data: NewsInput):
    tokenizer, model = load_model()
    prediction, confidence = predict_news(data.text, tokenizer, model)
    return {
        "prediction": prediction,
        "confidence": f"{confidence*100:.2f}%"
    }

# Streamlit app
def run_streamlit():
    def main():
        st.title("News Classifier")
        
        # Load model
        tokenizer, model = load_model()
        
        # Text input
        news_text = st.text_area("Enter news text to analyze:", height=200)
        
        if st.button("Classify"):
            if news_text:
                with st.spinner('Analyzing...'):
                    prediction, confidence = predict_news(news_text, tokenizer, model)
                    
                    # Display results
                    if prediction == "FAKE":
                        st.error(f"⚠️ {prediction} NEWS")
                    else:
                        st.success(f"✅ {prediction} NEWS")
                    
                    st.info(f"Confidence: {confidence*100:.2f}%")

    main()

# Threaded execution for FastAPI and Streamlit
def start_fastapi():
    import uvicorn
    uvicorn.run(api_app, host="0.0.0.0", port=8502)

if __name__ == "__main__":
    fastapi_thread = Thread(target=start_fastapi, daemon=True)
    fastapi_thread.start()

    # Start Streamlit
    cli.main()

# from fastapi import FastAPI, HTTPException
# from pydantic import BaseModel
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# import torch

# from fastapi.middleware.cors import CORSMiddleware


# # Define the FastAPI app
# app = FastAPI()

# app.add_middleware(
#     CORSMiddleware,
#     allow_origins=["*"],  # Update with your frontend's URL for security
#     allow_credentials=True,
#     allow_methods=["*"],
#     allow_headers=["*"],
# )
# # Define the input data schema
# class InputText(BaseModel):
#     text: str

# # Load the model and tokenizer (ensure these paths are correct in your Space)
# tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small', use_fast=False)
# model = AutoModelForSequenceClassification.from_pretrained("./results/checkpoint-753")
# model.eval()

# # Prediction function
# def predict_news(text: str):
#     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
#     with torch.no_grad():
#         outputs = model(**inputs)
#     probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
#     predicted_label = torch.argmax(probabilities, dim=-1).item()
#     confidence = probabilities[0][predicted_label].item()
#     return {
#         "prediction": "FAKE" if predicted_label == 1 else "REAL",
#         "confidence": round(confidence * 100, 2)  # Return confidence as a percentage
#     }

# # Define the POST endpoint
# @app.post("/predict")
# async def classify_news(input_text: InputText):
#     try:
#         result = predict_news(input_text.text)
#         return result
#     except Exception as e:
#         raise HTTPException(status_code=500, detail=str(e))