文件
memely-alpha-stock-ranking/build_multi_model_excel.py
hao 90d58a000b Add multi-model scoring: GPT-4.1-mini, GPT-4.1-nano, Gemini-2.5-flash
- MemelyAlphaStockRanking_multi_model.xlsx: 13-sheet Excel with per-model scores
  - Multi-Model Comparison (cross-model ranking)
  - Model Legend (color coding guide)
  - Scores - GPT-4.1-mini (blue highlights)
  - Scores - GPT-4.1-nano (green highlights)
  - Scores - Gemini-2.5-flash (purple highlights)
  - Rank sheets for each model
- scores_gpt41nano.json: GPT-4.1-nano raw scores
- scores_gemini25flash.json: Gemini-2.5-flash raw scores
- score_multi_model.py: Multi-model scoring script
- build_multi_model_excel.py: Excel generation script
2026-03-14 08:01:52 -04:00

534 行
19 KiB
Python

此文件含有模棱两可的 Unicode 字符
此文件含有可能会与其他字符混淆的 Unicode 字符。 如果您是想特意这样的,可以安全地忽略该警告。 使用 Escape 按钮显示他们。
#!/usr/bin/env python3
"""
Build a multi-model Excel file:
- Copy original sheets for each model
- Each model gets its own colored score columns
- Add a cross-model comparison summary sheet
"""
import json
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Font, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from copy import copy
EXCEL_SRC = "/home/ubuntu/upload/MemelyAlphaStockRanking副本副本.xlsx"
EXCEL_OUT = "/home/ubuntu/memely-alpha-stock-ranking/MemelyAlphaStockRanking_multi_model.xlsx"
# Model configs: name, json_path, highlight_color, header_color
MODELS = [
{
"name": "GPT-4.1-mini",
"short": "gpt41mini",
"json": "/home/ubuntu/memely-alpha-stock-ranking/research_scores.json",
"fill_color": "BDD7EE", # Light blue
"header_color": "2F75B5", # Dark blue
"header_font_color": "FFFFFF",
},
{
"name": "GPT-4.1-nano",
"short": "gpt41nano",
"json": "/home/ubuntu/memely-alpha-stock-ranking/scores_gpt41nano.json",
"fill_color": "C6EFCE", # Light green
"header_color": "548235", # Dark green
"header_font_color": "FFFFFF",
},
{
"name": "Gemini-2.5-flash",
"short": "gemini25flash",
"json": "/home/ubuntu/memely-alpha-stock-ranking/scores_gemini25flash.json",
"fill_color": "E2BFEE", # Light purple
"header_color": "7030A0", # Dark purple
"header_font_color": "FFFFFF",
},
]
# Load all scores
all_scores = {}
for m in MODELS:
with open(m["json"], 'r') as f:
all_scores[m["name"]] = json.load(f)
print(f"Loaded {len(all_scores[m['name']])} scores for {m['name']}")
# Load original workbook
wb = load_workbook(EXCEL_SRC)
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# ============================================================
# For each model, create a copy of "stock master list" sheet
# with that model's scores appended in its color
# ============================================================
# Read original data for reference
df = pd.read_excel(EXCEL_SRC, sheet_name='stock master list')
df4 = pd.read_excel(EXCEL_SRC, sheet_name='Sheet4')
# Get original sheet as template
orig_ws = wb['stock master list']
orig_max_col = orig_ws.max_column
orig_max_row = orig_ws.max_row
for model_cfg in MODELS:
model_name = model_cfg["name"]
scores = all_scores[model_name]
fill_color = model_cfg["fill_color"]
header_color = model_cfg["header_color"]
header_font_color = model_cfg["header_font_color"]
cell_fill = PatternFill(start_color=fill_color, end_color=fill_color, fill_type="solid")
hdr_fill = PatternFill(start_color=header_color, end_color=header_color, fill_type="solid")
hdr_font = Font(bold=True, color=header_font_color, size=11)
score_font = Font(size=10)
sheet_name = f"Scores - {model_name}"
# Truncate sheet name if too long (max 31 chars)
if len(sheet_name) > 31:
sheet_name = sheet_name[:31]
ws = wb.copy_worksheet(orig_ws)
ws.title = sheet_name
# Add score columns
new_headers = [
f'{model_name} Overall',
f'{model_name} Momentum',
f'{model_name} Theme',
f'{model_name} Risk',
f'{model_name} Social Buzz',
f'{model_name} Analysis',
]
for j, hdr in enumerate(new_headers):
col = orig_max_col + 1 + j
cell = ws.cell(row=1, column=col, value=hdr)
cell.fill = hdr_fill
cell.font = hdr_font
cell.alignment = Alignment(horizontal='center', wrap_text=True)
cell.border = thin_border
# Write scores
for row in range(2, orig_max_row + 1):
symbol = ws.cell(row=row, column=1).value
if symbol and symbol in scores:
s = scores[symbol]
fields = ['overall_score', 'momentum_score', 'theme_score', 'risk_score', 'social_buzz_score', 'brief_analysis']
for j, field in enumerate(fields):
col = orig_max_col + 1 + j
cell = ws.cell(row=row, column=col, value=s.get(field, ''))
cell.fill = cell_fill
cell.font = score_font
if field != 'brief_analysis':
cell.alignment = Alignment(horizontal='center')
else:
cell.alignment = Alignment(wrap_text=True)
cell.border = thin_border
# Set column widths
widths = [14, 16, 14, 12, 16, 55]
for j, w in enumerate(widths):
col_letter = get_column_letter(orig_max_col + 1 + j)
ws.column_dimensions[col_letter].width = w
ws.freeze_panes = 'B2'
print(f"Created sheet: {sheet_name}")
# ============================================================
# Create cross-model comparison summary sheet
# ============================================================
ws_comp = wb.create_sheet("Multi-Model Comparison", 0)
# Headers
comp_headers = ['Rank', 'Symbol', 'Theme']
for m in MODELS:
comp_headers.append(f'{m["name"]} Overall')
comp_headers.extend(['Avg Overall', 'Std Dev', 'Consensus'])
for m in MODELS:
comp_headers.append(f'{m["name"]} Momentum')
for m in MODELS:
comp_headers.append(f'{m["name"]} Theme')
for m in MODELS:
comp_headers.append(f'{m["name"]} Risk')
for m in MODELS:
comp_headers.append(f'{m["name"]} Social Buzz')
# Write headers with styling
general_hdr_fill = PatternFill(start_color="333333", end_color="333333", fill_type="solid")
general_hdr_font = Font(bold=True, color="FFFFFF", size=10)
for col, hdr in enumerate(comp_headers, 1):
cell = ws_comp.cell(row=1, column=col, value=hdr)
# Color-code model-specific headers
matched = False
for m in MODELS:
if m["name"] in hdr:
cell.fill = PatternFill(start_color=m["header_color"], end_color=m["header_color"], fill_type="solid")
cell.font = Font(bold=True, color=m["header_font_color"], size=10)
matched = True
break
if not matched:
cell.fill = general_hdr_fill
cell.font = general_hdr_font
cell.alignment = Alignment(horizontal='center', wrap_text=True)
cell.border = thin_border
# Collect all symbols and compute averages
symbol_info = {}
for _, row in df.iterrows():
sym = row['Symbol']
if pd.notna(sym) and sym not in symbol_info:
symbol_info[sym] = str(row.get('Theme', ''))
import statistics
ranking_data = []
for sym in symbol_info:
overalls = []
momentums = []
themes = []
risks = []
buzzes = []
for m in MODELS:
s = all_scores[m["name"]].get(sym, {})
o = s.get('overall_score', None)
if o is not None:
overalls.append(o)
mo = s.get('momentum_score', None)
if mo is not None:
momentums.append(mo)
t = s.get('theme_score', None)
if t is not None:
themes.append(t)
r = s.get('risk_score', None)
if r is not None:
risks.append(r)
b = s.get('social_buzz_score', None)
if b is not None:
buzzes.append(b)
avg = statistics.mean(overalls) if overalls else 0
std = statistics.stdev(overalls) if len(overalls) > 1 else 0
ranking_data.append({
'symbol': sym,
'theme': symbol_info[sym],
'avg_overall': avg,
'std_overall': std,
'overalls': overalls,
'momentums': momentums,
'themes': themes,
'risks': risks,
'buzzes': buzzes,
})
# Sort by average overall score
ranking_data.sort(key=lambda x: x['avg_overall'], reverse=True)
def get_score_fill_general(score):
if score >= 70:
return PatternFill(start_color="92D050", end_color="92D050", fill_type="solid")
elif score >= 50:
return PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
elif score >= 30:
return PatternFill(start_color="FFC000", end_color="FFC000", fill_type="solid")
else:
return PatternFill(start_color="FF6B6B", end_color="FF6B6B", fill_type="solid")
def get_consensus(avg, std):
if std <= 5:
return "Strong Consensus"
elif std <= 10:
return "Moderate Consensus"
elif std <= 15:
return "Weak Consensus"
else:
return "Divergent"
for rank, rd in enumerate(ranking_data, 1):
row = rank + 1
sym = rd['symbol']
ws_comp.cell(row=row, column=1, value=rank).border = thin_border
ws_comp.cell(row=row, column=1).alignment = Alignment(horizontal='center')
cell = ws_comp.cell(row=row, column=2, value=sym)
cell.font = Font(bold=True)
cell.border = thin_border
theme_val = rd['theme'] if pd.notna(rd['theme']) and rd['theme'] != 'nan' else ''
ws_comp.cell(row=row, column=3, value=theme_val).border = thin_border
# Model overall scores with model-specific colors
col = 4
for i, m in enumerate(MODELS):
s = all_scores[m["name"]].get(sym, {})
val = s.get('overall_score', '')
cell = ws_comp.cell(row=row, column=col + i, value=val)
if isinstance(val, (int, float)):
cell.fill = PatternFill(start_color=m["fill_color"], end_color=m["fill_color"], fill_type="solid")
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
col += len(MODELS)
# Average
avg = rd['avg_overall']
cell = ws_comp.cell(row=row, column=col, value=round(avg, 1))
cell.fill = get_score_fill_general(avg)
cell.font = Font(bold=True, size=11)
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
col += 1
# Std Dev
std = rd['std_overall']
cell = ws_comp.cell(row=row, column=col, value=round(std, 1))
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
col += 1
# Consensus
consensus = get_consensus(avg, std)
cell = ws_comp.cell(row=row, column=col, value=consensus)
if "Strong" in consensus:
cell.fill = PatternFill(start_color="92D050", end_color="92D050", fill_type="solid")
elif "Moderate" in consensus:
cell.fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
elif "Weak" in consensus:
cell.fill = PatternFill(start_color="FFC000", end_color="FFC000", fill_type="solid")
else:
cell.fill = PatternFill(start_color="FF6B6B", end_color="FF6B6B", fill_type="solid")
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
col += 1
# Momentum scores per model
for i, m in enumerate(MODELS):
s = all_scores[m["name"]].get(sym, {})
val = s.get('momentum_score', '')
cell = ws_comp.cell(row=row, column=col + i, value=val)
if isinstance(val, (int, float)):
cell.fill = PatternFill(start_color=m["fill_color"], end_color=m["fill_color"], fill_type="solid")
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
col += len(MODELS)
# Theme scores per model
for i, m in enumerate(MODELS):
s = all_scores[m["name"]].get(sym, {})
val = s.get('theme_score', '')
cell = ws_comp.cell(row=row, column=col + i, value=val)
if isinstance(val, (int, float)):
cell.fill = PatternFill(start_color=m["fill_color"], end_color=m["fill_color"], fill_type="solid")
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
col += len(MODELS)
# Risk scores per model
for i, m in enumerate(MODELS):
s = all_scores[m["name"]].get(sym, {})
val = s.get('risk_score', '')
cell = ws_comp.cell(row=row, column=col + i, value=val)
if isinstance(val, (int, float)):
cell.fill = PatternFill(start_color=m["fill_color"], end_color=m["fill_color"], fill_type="solid")
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
col += len(MODELS)
# Social buzz scores per model
for i, m in enumerate(MODELS):
s = all_scores[m["name"]].get(sym, {})
val = s.get('social_buzz_score', '')
cell = ws_comp.cell(row=row, column=col + i, value=val)
if isinstance(val, (int, float)):
cell.fill = PatternFill(start_color=m["fill_color"], end_color=m["fill_color"], fill_type="solid")
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
# Set column widths for comparison sheet
ws_comp.column_dimensions['A'].width = 6
ws_comp.column_dimensions['B'].width = 10
ws_comp.column_dimensions['C'].width = 18
for col_idx in range(4, 4 + len(comp_headers) - 3):
col_letter = get_column_letter(col_idx)
ws_comp.column_dimensions[col_letter].width = 16
ws_comp.freeze_panes = 'D2'
print(f"Created 'Multi-Model Comparison' sheet")
# ============================================================
# Also create per-model ranking sheets (sorted by that model's overall score)
# ============================================================
for model_cfg in MODELS:
model_name = model_cfg["name"]
scores = all_scores[model_name]
fill_color = model_cfg["fill_color"]
header_color = model_cfg["header_color"]
header_font_color = model_cfg["header_font_color"]
cell_fill = PatternFill(start_color=fill_color, end_color=fill_color, fill_type="solid")
hdr_fill = PatternFill(start_color=header_color, end_color=header_color, fill_type="solid")
hdr_font = Font(bold=True, color=header_font_color, size=10)
sheet_name = f"Rank - {model_name}"
if len(sheet_name) > 31:
sheet_name = sheet_name[:31]
ws_rank = wb.create_sheet(sheet_name)
rank_headers = ['Rank', 'Symbol', 'Theme', 'Overall', 'Momentum', 'Theme Score', 'Risk', 'Social Buzz', 'YTD Perf', 'Analysis']
for col, hdr in enumerate(rank_headers, 1):
cell = ws_rank.cell(row=1, column=col, value=hdr)
cell.fill = hdr_fill
cell.font = hdr_font
cell.alignment = Alignment(horizontal='center', wrap_text=True)
cell.border = thin_border
# Sort by overall score
sorted_scores = sorted(scores.items(), key=lambda x: x[1].get('overall_score', 0), reverse=True)
for rank, (sym, s) in enumerate(sorted_scores, 1):
row = rank + 1
theme = symbol_info.get(sym, '')
if pd.isna(theme) or theme == 'nan':
theme = ''
ws_rank.cell(row=row, column=1, value=rank).border = thin_border
ws_rank.cell(row=row, column=1).alignment = Alignment(horizontal='center')
cell = ws_rank.cell(row=row, column=2, value=sym)
cell.font = Font(bold=True)
cell.border = thin_border
ws_rank.cell(row=row, column=3, value=theme).border = thin_border
# Overall
overall = s.get('overall_score', 0)
cell = ws_rank.cell(row=row, column=4, value=overall)
cell.fill = cell_fill
cell.font = Font(bold=True, size=11)
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
# Momentum
cell = ws_rank.cell(row=row, column=5, value=s.get('momentum_score', ''))
cell.fill = cell_fill
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
# Theme Score
cell = ws_rank.cell(row=row, column=6, value=s.get('theme_score', ''))
cell.fill = cell_fill
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
# Risk
cell = ws_rank.cell(row=row, column=7, value=s.get('risk_score', ''))
cell.fill = cell_fill
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
# Social Buzz
cell = ws_rank.cell(row=row, column=8, value=s.get('social_buzz_score', ''))
cell.fill = cell_fill
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
# YTD Perf from original data
ytd_val = ''
for _, orig_row in df.iterrows():
if orig_row['Symbol'] == sym:
ytd_val = orig_row.get('performance Year to Date', '')
break
if pd.notna(ytd_val) and ytd_val != '':
cell = ws_rank.cell(row=row, column=9, value=ytd_val)
cell.number_format = '0.00%'
else:
cell = ws_rank.cell(row=row, column=9, value='N/A')
cell.alignment = Alignment(horizontal='center')
cell.border = thin_border
# Analysis
cell = ws_rank.cell(row=row, column=10, value=s.get('brief_analysis', ''))
cell.fill = cell_fill
cell.alignment = Alignment(wrap_text=True)
cell.border = thin_border
# Column widths
rank_widths = [6, 10, 18, 10, 12, 12, 10, 14, 12, 55]
for i, w in enumerate(rank_widths):
ws_rank.column_dimensions[get_column_letter(i + 1)].width = w
ws_rank.freeze_panes = 'D2'
print(f"Created ranking sheet: {sheet_name}")
# ============================================================
# Add a legend/info sheet
# ============================================================
ws_legend = wb.create_sheet("Model Legend", 1)
ws_legend.cell(row=1, column=1, value="Model Color Legend").font = Font(bold=True, size=14)
ws_legend.merge_cells('A1:D1')
ws_legend.cell(row=3, column=1, value="Model Name").font = Font(bold=True, size=11)
ws_legend.cell(row=3, column=2, value="Provider").font = Font(bold=True, size=11)
ws_legend.cell(row=3, column=3, value="Color").font = Font(bold=True, size=11)
ws_legend.cell(row=3, column=4, value="Sheet Names").font = Font(bold=True, size=11)
for i, m in enumerate(MODELS):
row = 4 + i
ws_legend.cell(row=row, column=1, value=m["name"]).font = Font(bold=True)
provider = "OpenAI" if "GPT" in m["name"] or "gpt" in m["name"] else "Google"
ws_legend.cell(row=row, column=2, value=provider)
cell = ws_legend.cell(row=row, column=3, value=f"Sample Color")
cell.fill = PatternFill(start_color=m["fill_color"], end_color=m["fill_color"], fill_type="solid")
sheet_name_scores = f"Scores - {m['name']}"[:31]
sheet_name_rank = f"Rank - {m['name']}"[:31]
ws_legend.cell(row=row, column=4, value=f"{sheet_name_scores}, {sheet_name_rank}")
ws_legend.cell(row=8, column=1, value="Sheet Overview").font = Font(bold=True, size=12)
sheets_info = [
("Multi-Model Comparison", "Cross-model comparison with all scores side by side, sorted by average overall score"),
("Model Legend", "This sheet - explains color coding and sheet structure"),
]
for m in MODELS:
sheets_info.append((f"Scores - {m['name']}"[:31], f"Original stock master list + {m['name']} scores in {m['name']} color"))
sheets_info.append((f"Rank - {m['name']}"[:31], f"Stocks ranked by {m['name']} overall score"))
for i, (sn, desc) in enumerate(sheets_info):
row = 9 + i
ws_legend.cell(row=row, column=1, value=sn).font = Font(bold=True)
ws_legend.cell(row=row, column=2, value=desc)
ws_legend.merge_cells(start_row=row, start_column=2, end_row=row, end_column=4)
ws_legend.column_dimensions['A'].width = 30
ws_legend.column_dimensions['B'].width = 20
ws_legend.column_dimensions['C'].width = 15
ws_legend.column_dimensions['D'].width = 60
print(f"Created 'Model Legend' sheet")
# Save
wb.save(EXCEL_OUT)
print(f"\n=== DONE ===")
print(f"Saved multi-model Excel to: {EXCEL_OUT}")
print(f"Total sheets: {len(wb.sheetnames)}")
for sn in wb.sheetnames:
print(f" - {sn}")