Add AI scoring results: 121 stocks scored with GPT-4.1-mini

- MemelyAlphaStockRanking_scored.xlsx: Excel with AI scores (yellow highlighted) - research_scores.json: Raw scoring data - score_stocks.py: AI scoring script - write_scores_to_excel.py: Excel writing script - Updated README with methodology and top 10 rankings
2026-03-14 07:55:14 -04:00
--- a/score_stocks.py
+++ b/score_stocks.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+"""
+Score all stocks in the MemelyAlpha ranking using AI model.
+Processes in batches to handle rate limits and save progress.
+"""
+
+import pandas as pd
+import json
+import os
+import time
+from openai import OpenAI
+
+client = OpenAI()
+
+EXCEL_PATH = "/home/ubuntu/upload/MemelyAlphaStockRanking（副本）（副本）.xlsx"
+OUTPUT_JSON = "/home/ubuntu/memely-alpha-stock-ranking/research_scores.json"
+PROGRESS_FILE = "/home/ubuntu/memely-alpha-stock-ranking/scoring_progress.json"
+
+# Read the master list
+df = pd.read_excel(EXCEL_PATH, sheet_name='stock master list')
+
+# Also read Sheet4 for sector/theme info
+df4 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet4')
+sector_map = {}
+for _, row in df4.iterrows():
+    sym = row.get('Symbol')
+    if pd.notna(sym):
+        sector_map[sym] = {
+            'sector': row.get('Sector', ''),
+            'theme': row.get('Theme', ''),
+            'cap_category': row.get('Cap Category', '')
+        }
+
+# Read Sheet6 for additional YTD data
+df6 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet6')
+ytd_extra = {}
+for _, row in df6.iterrows():
+    sym = row.get('Symbol')
+    if pd.notna(sym):
+        ytd_extra[sym] = row.get('performance Year to Date', None)
+
+# Build stock info for each symbol
+stocks_info = []
+seen = set()
+for _, row in df.iterrows():
+    sym = row['Symbol']
+    if pd.isna(sym) or sym in seen:
+        continue
+    seen.add(sym)
+    
+    info = {
+        'symbol': sym,
+        'theme': str(row.get('Theme', '')),
+        'perf_7d': row.get('performance past 7 days', None),
+        'perf_ytd': row.get('performance Year to Date', None),
+        'perf_specific': row.get('performance on specific dates', None),
+        'first_call_kol': str(row.get('first call X kol', '')),
+        'top_contributor': str(row.get('top contributor', '')),
+    }
+    
+    # Add sector info from Sheet4
+    if sym in sector_map:
+        info['sector'] = sector_map[sym].get('sector', '')
+        info['theme_s4'] = sector_map[sym].get('theme', '')
+    
+    # Add extra YTD from Sheet6
+    if sym in ytd_extra and pd.notna(ytd_extra[sym]):
+        info['ytd_extra'] = ytd_extra[sym]
+    
+    stocks_info.append(info)
+
+print(f"Total unique stocks to score: {len(stocks_info)}")
+
+# Load progress if exists
+scored = {}
+if os.path.exists(PROGRESS_FILE):
+    with open(PROGRESS_FILE, 'r') as f:
+        scored = json.load(f)
+    print(f"Loaded {len(scored)} previously scored stocks")
+
+def score_batch(batch_stocks):
+    """Score a batch of stocks using AI model."""
+    stocks_text = ""
+    for s in batch_stocks:
+        perf_7d = f"{s['perf_7d']:.2%}" if pd.notna(s.get('perf_7d')) else "N/A"
+        perf_ytd = f"{s['perf_ytd']:.2%}" if pd.notna(s.get('perf_ytd')) else "N/A"
+        perf_spec = f"{s['perf_specific']:.2%}" if pd.notna(s.get('perf_specific')) else "N/A"
+        sector = s.get('sector', s.get('theme', 'N/A'))
+        theme = s.get('theme_s4', s.get('theme', 'N/A'))
+        
+        stocks_text += f"""
+---
+Symbol: {s['symbol']}
+Sector: {sector}
+Theme: {theme}
+Performance (Past 7 Days): {perf_7d}
+Performance (Year to Date): {perf_ytd}
+Performance (Specific Date): {perf_spec}
+First Call KOL: {s.get('first_call_kol', 'N/A')}
+Top Contributor: {s.get('top_contributor', 'N/A')}
+"""
+
+    prompt = f"""You are a professional stock/crypto analyst. Evaluate each of the following stocks/assets and provide a comprehensive score.
+
+For EACH stock, provide:
+1. **overall_score** (1-100): Overall investment attractiveness score
+2. **momentum_score** (1-100): Based on recent price performance and momentum
+3. **theme_score** (1-100): How strong/relevant is the thematic play (e.g., AI, Defense, Aerospace, Crypto, etc.)
+4. **risk_score** (1-100): Risk level (100 = highest risk)
+5. **social_buzz_score** (1-100): Social media attention and KOL backing strength
+6. **brief_analysis**: 2-3 sentence analysis explaining the scores
+
+Consider these factors:
+- YTD performance indicates momentum strength
+- Thematic relevance to current market trends (AI, Defense, Aerospace, Crypto, Quantum Computing are hot in 2025-2026)
+- Small/micro cap stocks with strong themes get higher theme scores
+- Stocks with notable KOL backing get higher social buzz scores
+- Higher volatility = higher risk score
+
+Here are the stocks to evaluate:
+{stocks_text}
+
+Return ONLY a valid JSON array with objects for each stock. Each object must have: symbol, overall_score, momentum_score, theme_score, risk_score, social_buzz_score, brief_analysis.
+Do not include any text outside the JSON array."""
+
+    try:
+        response = client.chat.completions.create(
+            model="gpt-4.1-mini",
+            messages=[
+                {"role": "system", "content": "You are a professional financial analyst. Always respond with valid JSON only."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.3,
+            max_tokens=8000
+        )
+        
+        content = response.choices[0].message.content.strip()
+        # Try to extract JSON from the response
+        if content.startswith("```"):
+            content = content.split("```")[1]
+            if content.startswith("json"):
+                content = content[4:]
+        
+        results = json.loads(content)
+        return results
+    except Exception as e:
+        print(f"Error in batch scoring: {e}")
+        return None
+
+# Process in batches of 15
+batch_size = 15
+remaining = [s for s in stocks_info if s['symbol'] not in scored]
+print(f"Remaining to score: {len(remaining)}")
+
+total_batches = (len(remaining) + batch_size - 1) // batch_size
+
+for i in range(0, len(remaining), batch_size):
+    batch = remaining[i:i+batch_size]
+    batch_num = i // batch_size + 1
+    symbols_in_batch = [s['symbol'] for s in batch]
+    print(f"\nBatch {batch_num}/{total_batches}: Scoring {symbols_in_batch}")
+    
+    results = score_batch(batch)
+    
+    if results:
+        for r in results:
+            sym = r.get('symbol', '')
+            if sym:
+                scored[sym] = r
+                print(f"  ✓ {sym}: Overall={r.get('overall_score')}, Momentum={r.get('momentum_score')}, Theme={r.get('theme_score')}")
+        
+        # Save progress after each batch
+        with open(PROGRESS_FILE, 'w') as f:
+            json.dump(scored, f, indent=2, ensure_ascii=False)
+    else:
+        print(f"  ✗ Batch {batch_num} failed, will retry...")
+        # Retry once
+        time.sleep(2)
+        results = score_batch(batch)
+        if results:
+            for r in results:
+                sym = r.get('symbol', '')
+                if sym:
+                    scored[sym] = r
+            with open(PROGRESS_FILE, 'w') as f:
+                json.dump(scored, f, indent=2, ensure_ascii=False)
+    
+    time.sleep(1)  # Rate limit
+
+# Save final results
+with open(OUTPUT_JSON, 'w') as f:
+    json.dump(scored, f, indent=2, ensure_ascii=False)
+
+print(f"\n=== SCORING COMPLETE ===")
+print(f"Total scored: {len(scored)}/{len(stocks_info)}")
+print(f"Results saved to: {OUTPUT_JSON}")