文件
memely-alpha-stock-ranking/score_stocks.py
hao c579681989 Add AI scoring results: 121 stocks scored with GPT-4.1-mini
- MemelyAlphaStockRanking_scored.xlsx: Excel with AI scores (yellow highlighted)
- research_scores.json: Raw scoring data
- score_stocks.py: AI scoring script
- write_scores_to_excel.py: Excel writing script
- Updated README with methodology and top 10 rankings
2026-03-14 07:55:14 -04:00

197 行
6.8 KiB
Python

此文件含有模棱两可的 Unicode 字符
此文件含有可能会与其他字符混淆的 Unicode 字符。 如果您是想特意这样的,可以安全地忽略该警告。 使用 Escape 按钮显示他们。
#!/usr/bin/env python3
"""
Score all stocks in the MemelyAlpha ranking using AI model.
Processes in batches to handle rate limits and save progress.
"""
import pandas as pd
import json
import os
import time
from openai import OpenAI
client = OpenAI()
EXCEL_PATH = "/home/ubuntu/upload/MemelyAlphaStockRanking副本副本.xlsx"
OUTPUT_JSON = "/home/ubuntu/memely-alpha-stock-ranking/research_scores.json"
PROGRESS_FILE = "/home/ubuntu/memely-alpha-stock-ranking/scoring_progress.json"
# Read the master list
df = pd.read_excel(EXCEL_PATH, sheet_name='stock master list')
# Also read Sheet4 for sector/theme info
df4 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet4')
sector_map = {}
for _, row in df4.iterrows():
sym = row.get('Symbol')
if pd.notna(sym):
sector_map[sym] = {
'sector': row.get('Sector', ''),
'theme': row.get('Theme', ''),
'cap_category': row.get('Cap Category', '')
}
# Read Sheet6 for additional YTD data
df6 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet6')
ytd_extra = {}
for _, row in df6.iterrows():
sym = row.get('Symbol')
if pd.notna(sym):
ytd_extra[sym] = row.get('performance Year to Date', None)
# Build stock info for each symbol
stocks_info = []
seen = set()
for _, row in df.iterrows():
sym = row['Symbol']
if pd.isna(sym) or sym in seen:
continue
seen.add(sym)
info = {
'symbol': sym,
'theme': str(row.get('Theme', '')),
'perf_7d': row.get('performance past 7 days', None),
'perf_ytd': row.get('performance Year to Date', None),
'perf_specific': row.get('performance on specific dates', None),
'first_call_kol': str(row.get('first call X kol', '')),
'top_contributor': str(row.get('top contributor', '')),
}
# Add sector info from Sheet4
if sym in sector_map:
info['sector'] = sector_map[sym].get('sector', '')
info['theme_s4'] = sector_map[sym].get('theme', '')
# Add extra YTD from Sheet6
if sym in ytd_extra and pd.notna(ytd_extra[sym]):
info['ytd_extra'] = ytd_extra[sym]
stocks_info.append(info)
print(f"Total unique stocks to score: {len(stocks_info)}")
# Load progress if exists
scored = {}
if os.path.exists(PROGRESS_FILE):
with open(PROGRESS_FILE, 'r') as f:
scored = json.load(f)
print(f"Loaded {len(scored)} previously scored stocks")
def score_batch(batch_stocks):
"""Score a batch of stocks using AI model."""
stocks_text = ""
for s in batch_stocks:
perf_7d = f"{s['perf_7d']:.2%}" if pd.notna(s.get('perf_7d')) else "N/A"
perf_ytd = f"{s['perf_ytd']:.2%}" if pd.notna(s.get('perf_ytd')) else "N/A"
perf_spec = f"{s['perf_specific']:.2%}" if pd.notna(s.get('perf_specific')) else "N/A"
sector = s.get('sector', s.get('theme', 'N/A'))
theme = s.get('theme_s4', s.get('theme', 'N/A'))
stocks_text += f"""
---
Symbol: {s['symbol']}
Sector: {sector}
Theme: {theme}
Performance (Past 7 Days): {perf_7d}
Performance (Year to Date): {perf_ytd}
Performance (Specific Date): {perf_spec}
First Call KOL: {s.get('first_call_kol', 'N/A')}
Top Contributor: {s.get('top_contributor', 'N/A')}
"""
prompt = f"""You are a professional stock/crypto analyst. Evaluate each of the following stocks/assets and provide a comprehensive score.
For EACH stock, provide:
1. **overall_score** (1-100): Overall investment attractiveness score
2. **momentum_score** (1-100): Based on recent price performance and momentum
3. **theme_score** (1-100): How strong/relevant is the thematic play (e.g., AI, Defense, Aerospace, Crypto, etc.)
4. **risk_score** (1-100): Risk level (100 = highest risk)
5. **social_buzz_score** (1-100): Social media attention and KOL backing strength
6. **brief_analysis**: 2-3 sentence analysis explaining the scores
Consider these factors:
- YTD performance indicates momentum strength
- Thematic relevance to current market trends (AI, Defense, Aerospace, Crypto, Quantum Computing are hot in 2025-2026)
- Small/micro cap stocks with strong themes get higher theme scores
- Stocks with notable KOL backing get higher social buzz scores
- Higher volatility = higher risk score
Here are the stocks to evaluate:
{stocks_text}
Return ONLY a valid JSON array with objects for each stock. Each object must have: symbol, overall_score, momentum_score, theme_score, risk_score, social_buzz_score, brief_analysis.
Do not include any text outside the JSON array."""
try:
response = client.chat.completions.create(
model="gpt-4.1-mini",
messages=[
{"role": "system", "content": "You are a professional financial analyst. Always respond with valid JSON only."},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=8000
)
content = response.choices[0].message.content.strip()
# Try to extract JSON from the response
if content.startswith("```"):
content = content.split("```")[1]
if content.startswith("json"):
content = content[4:]
results = json.loads(content)
return results
except Exception as e:
print(f"Error in batch scoring: {e}")
return None
# Process in batches of 15
batch_size = 15
remaining = [s for s in stocks_info if s['symbol'] not in scored]
print(f"Remaining to score: {len(remaining)}")
total_batches = (len(remaining) + batch_size - 1) // batch_size
for i in range(0, len(remaining), batch_size):
batch = remaining[i:i+batch_size]
batch_num = i // batch_size + 1
symbols_in_batch = [s['symbol'] for s in batch]
print(f"\nBatch {batch_num}/{total_batches}: Scoring {symbols_in_batch}")
results = score_batch(batch)
if results:
for r in results:
sym = r.get('symbol', '')
if sym:
scored[sym] = r
print(f"{sym}: Overall={r.get('overall_score')}, Momentum={r.get('momentum_score')}, Theme={r.get('theme_score')}")
# Save progress after each batch
with open(PROGRESS_FILE, 'w') as f:
json.dump(scored, f, indent=2, ensure_ascii=False)
else:
print(f" ✗ Batch {batch_num} failed, will retry...")
# Retry once
time.sleep(2)
results = score_batch(batch)
if results:
for r in results:
sym = r.get('symbol', '')
if sym:
scored[sym] = r
with open(PROGRESS_FILE, 'w') as f:
json.dump(scored, f, indent=2, ensure_ascii=False)
time.sleep(1) # Rate limit
# Save final results
with open(OUTPUT_JSON, 'w') as f:
json.dump(scored, f, indent=2, ensure_ascii=False)
print(f"\n=== SCORING COMPLETE ===")
print(f"Total scored: {len(scored)}/{len(stocks_info)}")
print(f"Results saved to: {OUTPUT_JSON}")