#!/usr/bin/env python3 """ Score all stocks in the MemelyAlpha ranking using AI model. Processes in batches to handle rate limits and save progress. """ import pandas as pd import json import os import time from openai import OpenAI client = OpenAI() EXCEL_PATH = "/home/ubuntu/upload/MemelyAlphaStockRanking(副本)(副本).xlsx" OUTPUT_JSON = "/home/ubuntu/memely-alpha-stock-ranking/research_scores.json" PROGRESS_FILE = "/home/ubuntu/memely-alpha-stock-ranking/scoring_progress.json" # Read the master list df = pd.read_excel(EXCEL_PATH, sheet_name='stock master list') # Also read Sheet4 for sector/theme info df4 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet4') sector_map = {} for _, row in df4.iterrows(): sym = row.get('Symbol') if pd.notna(sym): sector_map[sym] = { 'sector': row.get('Sector', ''), 'theme': row.get('Theme', ''), 'cap_category': row.get('Cap Category', '') } # Read Sheet6 for additional YTD data df6 = pd.read_excel(EXCEL_PATH, sheet_name='Sheet6') ytd_extra = {} for _, row in df6.iterrows(): sym = row.get('Symbol') if pd.notna(sym): ytd_extra[sym] = row.get('performance Year to Date', None) # Build stock info for each symbol stocks_info = [] seen = set() for _, row in df.iterrows(): sym = row['Symbol'] if pd.isna(sym) or sym in seen: continue seen.add(sym) info = { 'symbol': sym, 'theme': str(row.get('Theme', '')), 'perf_7d': row.get('performance past 7 days', None), 'perf_ytd': row.get('performance Year to Date', None), 'perf_specific': row.get('performance on specific dates', None), 'first_call_kol': str(row.get('first call X kol', '')), 'top_contributor': str(row.get('top contributor', '')), } # Add sector info from Sheet4 if sym in sector_map: info['sector'] = sector_map[sym].get('sector', '') info['theme_s4'] = sector_map[sym].get('theme', '') # Add extra YTD from Sheet6 if sym in ytd_extra and pd.notna(ytd_extra[sym]): info['ytd_extra'] = ytd_extra[sym] stocks_info.append(info) print(f"Total unique stocks to score: {len(stocks_info)}") # Load progress if exists scored = {} if os.path.exists(PROGRESS_FILE): with open(PROGRESS_FILE, 'r') as f: scored = json.load(f) print(f"Loaded {len(scored)} previously scored stocks") def score_batch(batch_stocks): """Score a batch of stocks using AI model.""" stocks_text = "" for s in batch_stocks: perf_7d = f"{s['perf_7d']:.2%}" if pd.notna(s.get('perf_7d')) else "N/A" perf_ytd = f"{s['perf_ytd']:.2%}" if pd.notna(s.get('perf_ytd')) else "N/A" perf_spec = f"{s['perf_specific']:.2%}" if pd.notna(s.get('perf_specific')) else "N/A" sector = s.get('sector', s.get('theme', 'N/A')) theme = s.get('theme_s4', s.get('theme', 'N/A')) stocks_text += f""" --- Symbol: {s['symbol']} Sector: {sector} Theme: {theme} Performance (Past 7 Days): {perf_7d} Performance (Year to Date): {perf_ytd} Performance (Specific Date): {perf_spec} First Call KOL: {s.get('first_call_kol', 'N/A')} Top Contributor: {s.get('top_contributor', 'N/A')} """ prompt = f"""You are a professional stock/crypto analyst. Evaluate each of the following stocks/assets and provide a comprehensive score. For EACH stock, provide: 1. **overall_score** (1-100): Overall investment attractiveness score 2. **momentum_score** (1-100): Based on recent price performance and momentum 3. **theme_score** (1-100): How strong/relevant is the thematic play (e.g., AI, Defense, Aerospace, Crypto, etc.) 4. **risk_score** (1-100): Risk level (100 = highest risk) 5. **social_buzz_score** (1-100): Social media attention and KOL backing strength 6. **brief_analysis**: 2-3 sentence analysis explaining the scores Consider these factors: - YTD performance indicates momentum strength - Thematic relevance to current market trends (AI, Defense, Aerospace, Crypto, Quantum Computing are hot in 2025-2026) - Small/micro cap stocks with strong themes get higher theme scores - Stocks with notable KOL backing get higher social buzz scores - Higher volatility = higher risk score Here are the stocks to evaluate: {stocks_text} Return ONLY a valid JSON array with objects for each stock. Each object must have: symbol, overall_score, momentum_score, theme_score, risk_score, social_buzz_score, brief_analysis. Do not include any text outside the JSON array.""" try: response = client.chat.completions.create( model="gpt-4.1-mini", messages=[ {"role": "system", "content": "You are a professional financial analyst. Always respond with valid JSON only."}, {"role": "user", "content": prompt} ], temperature=0.3, max_tokens=8000 ) content = response.choices[0].message.content.strip() # Try to extract JSON from the response if content.startswith("```"): content = content.split("```")[1] if content.startswith("json"): content = content[4:] results = json.loads(content) return results except Exception as e: print(f"Error in batch scoring: {e}") return None # Process in batches of 15 batch_size = 15 remaining = [s for s in stocks_info if s['symbol'] not in scored] print(f"Remaining to score: {len(remaining)}") total_batches = (len(remaining) + batch_size - 1) // batch_size for i in range(0, len(remaining), batch_size): batch = remaining[i:i+batch_size] batch_num = i // batch_size + 1 symbols_in_batch = [s['symbol'] for s in batch] print(f"\nBatch {batch_num}/{total_batches}: Scoring {symbols_in_batch}") results = score_batch(batch) if results: for r in results: sym = r.get('symbol', '') if sym: scored[sym] = r print(f" ✓ {sym}: Overall={r.get('overall_score')}, Momentum={r.get('momentum_score')}, Theme={r.get('theme_score')}") # Save progress after each batch with open(PROGRESS_FILE, 'w') as f: json.dump(scored, f, indent=2, ensure_ascii=False) else: print(f" ✗ Batch {batch_num} failed, will retry...") # Retry once time.sleep(2) results = score_batch(batch) if results: for r in results: sym = r.get('symbol', '') if sym: scored[sym] = r with open(PROGRESS_FILE, 'w') as f: json.dump(scored, f, indent=2, ensure_ascii=False) time.sleep(1) # Rate limit # Save final results with open(OUTPUT_JSON, 'w') as f: json.dump(scored, f, indent=2, ensure_ascii=False) print(f"\n=== SCORING COMPLETE ===") print(f"Total scored: {len(scored)}/{len(stocks_info)}") print(f"Results saved to: {OUTPUT_JSON}")