memely-alpha-stock-ranking/score_stocks.py

#!/usr/bin/env python3
"""
Score all stocks in the MemelyAlpha ranking using AI model.
Processes in batches to handle rate limits and save progress.
"""

import pandas as pd
import json
import os
import time
from openai import OpenAI

client = OpenAI()

EXCEL_PATH = "/home/ubuntu/upload/MemelyAlphaStockRanking（副本）（副本）.xlsx"
OUTPUT_JSON = "/home/ubuntu/memely-alpha-stock-ranking/research_scores.json"
PROGRESS_FILE = "/home/ubuntu/memely-alpha-stock-ranking/scoring_progress.json"

# Read the master list
df = pd.read_excel(EXCEL_PATH, sheet_name='stock master list')

# Also read Sheet4 for sector/theme info
df4 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet4')
sector_map = {}
for _, row in df4.iterrows():
    sym = row.get('Symbol')
    if pd.notna(sym):
        sector_map[sym] = {
            'sector': row.get('Sector', ''),
            'theme': row.get('Theme', ''),
            'cap_category': row.get('Cap Category', '')
        }

# Read Sheet6 for additional YTD data
df6 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet6')
ytd_extra = {}
for _, row in df6.iterrows():
    sym = row.get('Symbol')
    if pd.notna(sym):
        ytd_extra[sym] = row.get('performance Year to Date', None)

# Build stock info for each symbol
stocks_info = []
seen = set()
for _, row in df.iterrows():
    sym = row['Symbol']
    if pd.isna(sym) or sym in seen:
        continue
    seen.add(sym)

    info = {
        'symbol': sym,
        'theme': str(row.get('Theme', '')),
        'perf_7d': row.get('performance past 7 days', None),
        'perf_ytd': row.get('performance Year to Date', None),
        'perf_specific': row.get('performance on specific dates', None),
        'first_call_kol': str(row.get('first call X kol', '')),
        'top_contributor': str(row.get('top contributor', '')),
    }

    # Add sector info from Sheet4
    if sym in sector_map:
        info['sector'] = sector_map[sym].get('sector', '')
        info['theme_s4'] = sector_map[sym].get('theme', '')

    # Add extra YTD from Sheet6
    if sym in ytd_extra and pd.notna(ytd_extra[sym]):
        info['ytd_extra'] = ytd_extra[sym]

    stocks_info.append(info)

print(f"Total unique stocks to score: {len(stocks_info)}")

# Load progress if exists
scored = {}
if os.path.exists(PROGRESS_FILE):
    with open(PROGRESS_FILE, 'r') as f:
        scored = json.load(f)
    print(f"Loaded {len(scored)} previously scored stocks")

def score_batch(batch_stocks):
    """Score a batch of stocks using AI model."""
    stocks_text = ""
    for s in batch_stocks:
        perf_7d = f"{s['perf_7d']:.2%}" if pd.notna(s.get('perf_7d')) else "N/A"
        perf_ytd = f"{s['perf_ytd']:.2%}" if pd.notna(s.get('perf_ytd')) else "N/A"
        perf_spec = f"{s['perf_specific']:.2%}" if pd.notna(s.get('perf_specific')) else "N/A"
        sector = s.get('sector', s.get('theme', 'N/A'))
        theme = s.get('theme_s4', s.get('theme', 'N/A'))

        stocks_text += f"""
---
Symbol: {s['symbol']}
Sector: {sector}
Theme: {theme}
Performance (Past 7 Days): {perf_7d}
Performance (Year to Date): {perf_ytd}
Performance (Specific Date): {perf_spec}
First Call KOL: {s.get('first_call_kol', 'N/A')}
Top Contributor: {s.get('top_contributor', 'N/A')}
"""

    prompt = f"""You are a professional stock/crypto analyst. Evaluate each of the following stocks/assets and provide a comprehensive score.

For EACH stock, provide:
1. **overall_score** (1-100): Overall investment attractiveness score
2. **momentum_score** (1-100): Based on recent price performance and momentum
3. **theme_score** (1-100): How strong/relevant is the thematic play (e.g., AI, Defense, Aerospace, Crypto, etc.)
4. **risk_score** (1-100): Risk level (100 = highest risk)
5. **social_buzz_score** (1-100): Social media attention and KOL backing strength
6. **brief_analysis**: 2-3 sentence analysis explaining the scores

Consider these factors:
- YTD performance indicates momentum strength
- Thematic relevance to current market trends (AI, Defense, Aerospace, Crypto, Quantum Computing are hot in 2025-2026)
- Small/micro cap stocks with strong themes get higher theme scores
- Stocks with notable KOL backing get higher social buzz scores
- Higher volatility = higher risk score

Here are the stocks to evaluate:
{stocks_text}

Return ONLY a valid JSON array with objects for each stock. Each object must have: symbol, overall_score, momentum_score, theme_score, risk_score, social_buzz_score, brief_analysis.
Do not include any text outside the JSON array."""

    try:
        response = client.chat.completions.create(
            model="gpt-4.1-mini",
            messages=[
                {"role": "system", "content": "You are a professional financial analyst. Always respond with valid JSON only."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3,
            max_tokens=8000
        )

        content = response.choices[0].message.content.strip()
        # Try to extract JSON from the response
        if content.startswith("```"):
            content = content.split("```")[1]
            if content.startswith("json"):
                content = content[4:]

        results = json.loads(content)
        return results
    except Exception as e:
        print(f"Error in batch scoring: {e}")
        return None

# Process in batches of 15
batch_size = 15
remaining = [s for s in stocks_info if s['symbol'] not in scored]
print(f"Remaining to score: {len(remaining)}")

total_batches = (len(remaining) + batch_size - 1) // batch_size

for i in range(0, len(remaining), batch_size):
    batch = remaining[i:i+batch_size]
    batch_num = i // batch_size + 1
    symbols_in_batch = [s['symbol'] for s in batch]
    print(f"\nBatch {batch_num}/{total_batches}: Scoring {symbols_in_batch}")

    results = score_batch(batch)

    if results:
        for r in results:
            sym = r.get('symbol', '')
            if sym:
                scored[sym] = r
                print(f"  ✓ {sym}: Overall={r.get('overall_score')}, Momentum={r.get('momentum_score')}, Theme={r.get('theme_score')}")

        # Save progress after each batch
        with open(PROGRESS_FILE, 'w') as f:
            json.dump(scored, f, indent=2, ensure_ascii=False)
    else:
        print(f"  ✗ Batch {batch_num} failed, will retry...")
        # Retry once
        time.sleep(2)
        results = score_batch(batch)
        if results:
            for r in results:
                sym = r.get('symbol', '')
                if sym:
                    scored[sym] = r
            with open(PROGRESS_FILE, 'w') as f:
                json.dump(scored, f, indent=2, ensure_ascii=False)

    time.sleep(1)  # Rate limit

# Save final results
with open(OUTPUT_JSON, 'w') as f:
    json.dump(scored, f, indent=2, ensure_ascii=False)

print(f"\n=== SCORING COMPLETE ===")
print(f"Total scored: {len(scored)}/{len(stocks_info)}")
print(f"Results saved to: {OUTPUT_JSON}")