Add AI scoring results: 121 stocks scored with GPT-4.1-mini
- MemelyAlphaStockRanking_scored.xlsx: Excel with AI scores (yellow highlighted) - research_scores.json: Raw scoring data - score_stocks.py: AI scoring script - write_scores_to_excel.py: Excel writing script - Updated README with methodology and top 10 rankings
这个提交包含在:
196
score_stocks.py
普通文件
196
score_stocks.py
普通文件
@@ -0,0 +1,196 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Score all stocks in the MemelyAlpha ranking using AI model.
|
||||
Processes in batches to handle rate limits and save progress.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
EXCEL_PATH = "/home/ubuntu/upload/MemelyAlphaStockRanking(副本)(副本).xlsx"
|
||||
OUTPUT_JSON = "/home/ubuntu/memely-alpha-stock-ranking/research_scores.json"
|
||||
PROGRESS_FILE = "/home/ubuntu/memely-alpha-stock-ranking/scoring_progress.json"
|
||||
|
||||
# Read the master list
|
||||
df = pd.read_excel(EXCEL_PATH, sheet_name='stock master list')
|
||||
|
||||
# Also read Sheet4 for sector/theme info
|
||||
df4 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet4')
|
||||
sector_map = {}
|
||||
for _, row in df4.iterrows():
|
||||
sym = row.get('Symbol')
|
||||
if pd.notna(sym):
|
||||
sector_map[sym] = {
|
||||
'sector': row.get('Sector', ''),
|
||||
'theme': row.get('Theme', ''),
|
||||
'cap_category': row.get('Cap Category', '')
|
||||
}
|
||||
|
||||
# Read Sheet6 for additional YTD data
|
||||
df6 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet6')
|
||||
ytd_extra = {}
|
||||
for _, row in df6.iterrows():
|
||||
sym = row.get('Symbol')
|
||||
if pd.notna(sym):
|
||||
ytd_extra[sym] = row.get('performance Year to Date', None)
|
||||
|
||||
# Build stock info for each symbol
|
||||
stocks_info = []
|
||||
seen = set()
|
||||
for _, row in df.iterrows():
|
||||
sym = row['Symbol']
|
||||
if pd.isna(sym) or sym in seen:
|
||||
continue
|
||||
seen.add(sym)
|
||||
|
||||
info = {
|
||||
'symbol': sym,
|
||||
'theme': str(row.get('Theme', '')),
|
||||
'perf_7d': row.get('performance past 7 days', None),
|
||||
'perf_ytd': row.get('performance Year to Date', None),
|
||||
'perf_specific': row.get('performance on specific dates', None),
|
||||
'first_call_kol': str(row.get('first call X kol', '')),
|
||||
'top_contributor': str(row.get('top contributor', '')),
|
||||
}
|
||||
|
||||
# Add sector info from Sheet4
|
||||
if sym in sector_map:
|
||||
info['sector'] = sector_map[sym].get('sector', '')
|
||||
info['theme_s4'] = sector_map[sym].get('theme', '')
|
||||
|
||||
# Add extra YTD from Sheet6
|
||||
if sym in ytd_extra and pd.notna(ytd_extra[sym]):
|
||||
info['ytd_extra'] = ytd_extra[sym]
|
||||
|
||||
stocks_info.append(info)
|
||||
|
||||
print(f"Total unique stocks to score: {len(stocks_info)}")
|
||||
|
||||
# Load progress if exists
|
||||
scored = {}
|
||||
if os.path.exists(PROGRESS_FILE):
|
||||
with open(PROGRESS_FILE, 'r') as f:
|
||||
scored = json.load(f)
|
||||
print(f"Loaded {len(scored)} previously scored stocks")
|
||||
|
||||
def score_batch(batch_stocks):
|
||||
"""Score a batch of stocks using AI model."""
|
||||
stocks_text = ""
|
||||
for s in batch_stocks:
|
||||
perf_7d = f"{s['perf_7d']:.2%}" if pd.notna(s.get('perf_7d')) else "N/A"
|
||||
perf_ytd = f"{s['perf_ytd']:.2%}" if pd.notna(s.get('perf_ytd')) else "N/A"
|
||||
perf_spec = f"{s['perf_specific']:.2%}" if pd.notna(s.get('perf_specific')) else "N/A"
|
||||
sector = s.get('sector', s.get('theme', 'N/A'))
|
||||
theme = s.get('theme_s4', s.get('theme', 'N/A'))
|
||||
|
||||
stocks_text += f"""
|
||||
---
|
||||
Symbol: {s['symbol']}
|
||||
Sector: {sector}
|
||||
Theme: {theme}
|
||||
Performance (Past 7 Days): {perf_7d}
|
||||
Performance (Year to Date): {perf_ytd}
|
||||
Performance (Specific Date): {perf_spec}
|
||||
First Call KOL: {s.get('first_call_kol', 'N/A')}
|
||||
Top Contributor: {s.get('top_contributor', 'N/A')}
|
||||
"""
|
||||
|
||||
prompt = f"""You are a professional stock/crypto analyst. Evaluate each of the following stocks/assets and provide a comprehensive score.
|
||||
|
||||
For EACH stock, provide:
|
||||
1. **overall_score** (1-100): Overall investment attractiveness score
|
||||
2. **momentum_score** (1-100): Based on recent price performance and momentum
|
||||
3. **theme_score** (1-100): How strong/relevant is the thematic play (e.g., AI, Defense, Aerospace, Crypto, etc.)
|
||||
4. **risk_score** (1-100): Risk level (100 = highest risk)
|
||||
5. **social_buzz_score** (1-100): Social media attention and KOL backing strength
|
||||
6. **brief_analysis**: 2-3 sentence analysis explaining the scores
|
||||
|
||||
Consider these factors:
|
||||
- YTD performance indicates momentum strength
|
||||
- Thematic relevance to current market trends (AI, Defense, Aerospace, Crypto, Quantum Computing are hot in 2025-2026)
|
||||
- Small/micro cap stocks with strong themes get higher theme scores
|
||||
- Stocks with notable KOL backing get higher social buzz scores
|
||||
- Higher volatility = higher risk score
|
||||
|
||||
Here are the stocks to evaluate:
|
||||
{stocks_text}
|
||||
|
||||
Return ONLY a valid JSON array with objects for each stock. Each object must have: symbol, overall_score, momentum_score, theme_score, risk_score, social_buzz_score, brief_analysis.
|
||||
Do not include any text outside the JSON array."""
|
||||
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4.1-mini",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a professional financial analyst. Always respond with valid JSON only."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=0.3,
|
||||
max_tokens=8000
|
||||
)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
# Try to extract JSON from the response
|
||||
if content.startswith("```"):
|
||||
content = content.split("```")[1]
|
||||
if content.startswith("json"):
|
||||
content = content[4:]
|
||||
|
||||
results = json.loads(content)
|
||||
return results
|
||||
except Exception as e:
|
||||
print(f"Error in batch scoring: {e}")
|
||||
return None
|
||||
|
||||
# Process in batches of 15
|
||||
batch_size = 15
|
||||
remaining = [s for s in stocks_info if s['symbol'] not in scored]
|
||||
print(f"Remaining to score: {len(remaining)}")
|
||||
|
||||
total_batches = (len(remaining) + batch_size - 1) // batch_size
|
||||
|
||||
for i in range(0, len(remaining), batch_size):
|
||||
batch = remaining[i:i+batch_size]
|
||||
batch_num = i // batch_size + 1
|
||||
symbols_in_batch = [s['symbol'] for s in batch]
|
||||
print(f"\nBatch {batch_num}/{total_batches}: Scoring {symbols_in_batch}")
|
||||
|
||||
results = score_batch(batch)
|
||||
|
||||
if results:
|
||||
for r in results:
|
||||
sym = r.get('symbol', '')
|
||||
if sym:
|
||||
scored[sym] = r
|
||||
print(f" ✓ {sym}: Overall={r.get('overall_score')}, Momentum={r.get('momentum_score')}, Theme={r.get('theme_score')}")
|
||||
|
||||
# Save progress after each batch
|
||||
with open(PROGRESS_FILE, 'w') as f:
|
||||
json.dump(scored, f, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
print(f" ✗ Batch {batch_num} failed, will retry...")
|
||||
# Retry once
|
||||
time.sleep(2)
|
||||
results = score_batch(batch)
|
||||
if results:
|
||||
for r in results:
|
||||
sym = r.get('symbol', '')
|
||||
if sym:
|
||||
scored[sym] = r
|
||||
with open(PROGRESS_FILE, 'w') as f:
|
||||
json.dump(scored, f, indent=2, ensure_ascii=False)
|
||||
|
||||
time.sleep(1) # Rate limit
|
||||
|
||||
# Save final results
|
||||
with open(OUTPUT_JSON, 'w') as f:
|
||||
json.dump(scored, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n=== SCORING COMPLETE ===")
|
||||
print(f"Total scored: {len(scored)}/{len(stocks_info)}")
|
||||
print(f"Results saved to: {OUTPUT_JSON}")
|
||||
在新工单中引用
屏蔽一个用户