-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentiment.py
229 lines (183 loc) · 7.38 KB
/
sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# sentiment.py
import requests
from bs4 import BeautifulSoup
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import plotly.graph_objects as go
import datetime
from typing import List, Tuple, Optional
# Ensure the VADER lexicon is downloaded
nltk.download('vader_lexicon')
# Initialize VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()
def fetch_company_name(ticker: str) -> Optional[str]:
"""
Fetches the company name for a given stock ticker from NSE India.
Args:
ticker (str): Stock ticker symbol (e.g., 'RELIANCE').
Returns:
Optional[str]: Company name if found, else None.
"""
url = f"https://www.nseindia.com/get-quotes/equity?symbol={ticker}"
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/91.0.4472.124 Safari/537.36"
),
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
}
session = requests.Session()
session.headers.update(headers)
try:
response = session.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
# Find company name in the HTML structure
company_name_tag = soup.find("span", {"id": "securityName"})
if company_name_tag:
return company_name_tag.text.strip()
else:
return None
except Exception as e:
print(f"Error fetching company name: {e}")
return None
def fetch_news_headlines(ticker: str, company_name: Optional[str], limit: int = 15) -> List[str]:
"""
Fetches the latest news headlines for a given stock ticker from MoneyControl.
Args:
ticker (str): Stock ticker symbol (e.g., 'RELIANCE').
company_name (Optional[str]): Company name corresponding to the ticker.
limit (int): Number of headlines to fetch.
Returns:
List[str]: A list of relevant news headlines.
"""
news_url = f"https://www.moneycontrol.com/news/tags/{ticker.lower()}.html"
headers = {
"User-Agent": "Mozilla/5.0",
"Accept-Language": "en-US,en;q=0.9",
}
try:
response = requests.get(news_url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
# Find news articles based on the current structure of MoneyControl
articles = soup.find_all('a', class_='news-link') # Extract news article links
news_data = []
# Define keywords for filtering
if company_name:
keywords = [ticker, company_name]
else:
keywords = [ticker]
for article in articles:
title = article.get_text(strip=True)
if any(keyword.lower() in title.lower() for keyword in keywords):
news_data.append(title)
if len(news_data) >= limit:
break
# Fallback if not enough headlines found
if len(news_data) < limit:
additional_articles = soup.find_all('a', href=True)
for article in additional_articles:
title = article.get_text(strip=True)
if any(keyword.lower() in title.lower() for keyword in keywords) and title not in news_data:
news_data.append(title)
if len(news_data) >= limit:
break
return news_data[:limit]
except Exception as e:
print(f"Error fetching news headlines: {e}")
return []
def perform_sentiment_analysis(headlines: List[str]) -> pd.DataFrame:
"""
Performs sentiment analysis on a list of news headlines.
Args:
headlines (List[str]): List of news headlines.
Returns:
pd.DataFrame: DataFrame containing headlines and their sentiment scores.
"""
sentiment_data = []
for headline in headlines:
score = sia.polarity_scores(headline)['compound']
sentiment_data.append({'headline': headline, 'score': score})
sentiment_df = pd.DataFrame(sentiment_data)
return sentiment_df
def train_sentiment_model(sentiment_df: pd.DataFrame) -> Tuple[Optional[LinearRegression], Optional[float]]:
"""
Trains a simple Linear Regression model on sentiment scores.
Args:
sentiment_df (pd.DataFrame): DataFrame containing sentiment scores.
Returns:
Tuple[Optional[LinearRegression], Optional[float]]: Trained model and R² score.
"""
if sentiment_df.empty or len(sentiment_df) < 5:
print("Not enough data to train the sentiment model.")
return None, None
sentiment_df = sentiment_df.copy()
sentiment_df['day'] = range(1, len(sentiment_df) + 1)
X = sentiment_df[['day']]
y = sentiment_df['score']
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
return model, r2
def predict_future_sentiment(model: LinearRegression, last_day: int, days_ahead: int = 3) -> pd.DataFrame:
"""
Predicts future sentiment scores using the trained model.
Args:
model (LinearRegression): Trained Linear Regression model.
last_day (int): The last day number from the training data.
days_ahead (int): Number of days to predict.
Returns:
pd.DataFrame: DataFrame containing predicted dates and their sentiment scores.
"""
future_days = pd.DataFrame({'day': range(last_day + 1, last_day + days_ahead + 1)})
predictions = model.predict(future_days)
# Assuming today's date as the last date, calculate future dates
last_date = datetime.date.today()
prediction_dates = [last_date + datetime.timedelta(days=i) for i in range(1, days_ahead + 1)]
prediction_df = pd.DataFrame({
'date': prediction_dates,
'predicted_score': predictions
})
return prediction_df
def plot_sentiment(sentiment_df: pd.DataFrame, prediction_df: pd.DataFrame) -> go.Figure:
"""
Plots historical and predicted sentiment scores.
Args:
sentiment_df (pd.DataFrame): DataFrame with historical sentiment scores.
prediction_df (pd.DataFrame): DataFrame with predicted sentiment scores.
Returns:
go.Figure: Plotly figure object containing the sentiment plot.
"""
fig = go.Figure()
# Historical Sentiment
fig.add_trace(go.Scatter(
x=sentiment_df.index + 1,
y=sentiment_df['score'],
mode='lines+markers',
name='Historical Sentiment',
line=dict(color='blue')
))
# Predicted Sentiment
if not prediction_df.empty:
fig.add_trace(go.Scatter(
x=prediction_df['date'],
y=prediction_df['predicted_score'],
mode='lines+markers',
name='Predicted Sentiment',
line=dict(color='orange', dash='dash')
))
fig.update_layout(
title="Sentiment Analysis & Prediction",
xaxis_title="Date",
yaxis_title="Sentiment Score",
template="plotly_dark"
)
return fig