Spaces:
Running
Running
import streamlit as st | |
import requests | |
import json | |
import networkx as nx | |
from urllib.parse import urlparse, urljoin | |
import time | |
from datetime import datetime | |
import anthropic | |
from typing import List, Dict, Set, Tuple | |
import re | |
from collections import defaultdict | |
import numpy as np | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import logging | |
import pandas as pd | |
import plotly.graph_objects as go | |
import plotly.express as px | |
from io import StringIO | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
logger = logging.getLogger(__name__) | |
# Page config | |
st.set_page_config( | |
page_title="WordPress SEO Query Analyzer", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom CSS | |
st.markdown(""" | |
<style> | |
.stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p { | |
font-size: 16px; | |
} | |
.metric-card { | |
background-color: #f0f2f6; | |
padding: 20px; | |
border-radius: 10px; | |
text-align: center; | |
} | |
.recommendation-card { | |
background-color: #e8f4f8; | |
padding: 15px; | |
border-radius: 8px; | |
margin-bottom: 10px; | |
border-left: 4px solid #1f77b4; | |
} | |
.high-priority { | |
border-left-color: #ff4444; | |
} | |
.medium-priority { | |
border-left-color: #ffaa44; | |
} | |
.low-priority { | |
border-left-color: #44ff44; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
class WordPressQueryFanOutAnalyzer: | |
"""Analyze WordPress sites for Google AI Mode query fan-out optimization""" | |
def __init__(self, site_url: str, claude_api_key: str): | |
self.site_url = site_url.rstrip('/') | |
self.api_base = f"{self.site_url}/wp-json/wp/v2" | |
self.claude = anthropic.Anthropic(api_key=claude_api_key) | |
self.content_graph = nx.DiGraph() | |
self.query_patterns = defaultdict(list) | |
self.content_cache = {} | |
self.tfidf_vectorizer = TfidfVectorizer(max_features=1000, stop_words='english') | |
def fetch_all_content(self, progress_callback=None) -> Dict: | |
"""Fetch all content from WordPress site""" | |
content = { | |
'posts': [], | |
'pages': [], | |
'categories': [], | |
'tags': [], | |
'media': [] | |
} | |
# Fetch posts | |
if progress_callback: | |
progress_callback(0.1, "Fetching posts...") | |
content['posts'] = self.fetch_posts() | |
# Fetch pages | |
if progress_callback: | |
progress_callback(0.3, "Fetching pages...") | |
content['pages'] = self.fetch_pages() | |
# Fetch categories | |
if progress_callback: | |
progress_callback(0.5, "Fetching categories...") | |
content['categories'] = self.fetch_categories() | |
# Fetch tags | |
if progress_callback: | |
progress_callback(0.7, "Fetching tags...") | |
content['tags'] = self.fetch_tags() | |
# Fetch media | |
if progress_callback: | |
progress_callback(0.9, "Fetching media info...") | |
content['media'] = self.fetch_media_info() | |
if progress_callback: | |
progress_callback(1.0, "Content fetching complete!") | |
return content | |
def fetch_posts(self, per_page=100) -> List[Dict]: | |
"""Fetch all posts from WordPress""" | |
posts = [] | |
page = 1 | |
while True: | |
try: | |
response = requests.get( | |
f"{self.api_base}/posts", | |
params={'per_page': per_page, 'page': page, '_embed': True}, | |
timeout=30 | |
) | |
if response.status_code == 200: | |
batch = response.json() | |
if not batch: | |
break | |
posts.extend(batch) | |
page += 1 | |
time.sleep(0.5) # Rate limiting | |
else: | |
break | |
except Exception as e: | |
st.error(f"Error fetching posts: {e}") | |
break | |
return posts | |
def fetch_pages(self, per_page=100) -> List[Dict]: | |
"""Fetch all pages from WordPress""" | |
pages = [] | |
page = 1 | |
while True: | |
try: | |
response = requests.get( | |
f"{self.api_base}/pages", | |
params={'per_page': per_page, 'page': page, '_embed': True}, | |
timeout=30 | |
) | |
if response.status_code == 200: | |
batch = response.json() | |
if not batch: | |
break | |
pages.extend(batch) | |
page += 1 | |
time.sleep(0.5) | |
else: | |
break | |
except Exception as e: | |
st.error(f"Error fetching pages: {e}") | |
break | |
return pages | |
def fetch_categories(self) -> List[Dict]: | |
"""Fetch all categories""" | |
try: | |
response = requests.get(f"{self.api_base}/categories", params={'per_page': 100}, timeout=30) | |
return response.json() if response.status_code == 200 else [] | |
except: | |
return [] | |
def fetch_tags(self) -> List[Dict]: | |
"""Fetch all tags""" | |
try: | |
response = requests.get(f"{self.api_base}/tags", params={'per_page': 100}, timeout=30) | |
return response.json() if response.status_code == 200 else [] | |
except: | |
return [] | |
def fetch_media_info(self) -> List[Dict]: | |
"""Fetch media information""" | |
try: | |
response = requests.get(f"{self.api_base}/media", params={'per_page': 50}, timeout=30) | |
return response.json() if response.status_code == 200 else [] | |
except: | |
return [] | |
def build_content_graph(self, content: Dict) -> nx.DiGraph: | |
"""Build a graph representation of the site's content""" | |
# Add posts as nodes | |
for post in content['posts']: | |
self.content_graph.add_node( | |
post['id'], | |
type='post', | |
title=post['title']['rendered'], | |
url=post['link'], | |
content=self.clean_html(post['content']['rendered']), | |
excerpt=self.clean_html(post['excerpt']['rendered']), | |
categories=post.get('categories', []), | |
tags=post.get('tags', []), | |
date=post['date'] | |
) | |
# Add pages as nodes | |
for page in content['pages']: | |
self.content_graph.add_node( | |
f"page_{page['id']}", | |
type='page', | |
title=page['title']['rendered'], | |
url=page['link'], | |
content=self.clean_html(page['content']['rendered']), | |
parent=page.get('parent', 0), | |
date=page['date'] | |
) | |
# Build edges based on internal links | |
self.build_internal_link_edges() | |
# Build edges based on category/tag relationships | |
self.build_taxonomy_edges(content) | |
return self.content_graph | |
def clean_html(self, html: str) -> str: | |
"""Remove HTML tags and clean text""" | |
text = re.sub('<.*?>', '', html) | |
text = re.sub(r'\s+', ' ', text) | |
return text.strip() | |
def build_internal_link_edges(self): | |
"""Extract and build edges from internal links""" | |
for node_id, data in self.content_graph.nodes(data=True): | |
if 'content' in data: | |
# Extract internal links | |
links = re.findall(rf'{self.site_url}/[^"\'>\s]+', data['content']) | |
for link in links: | |
# Find the target node | |
for target_id, target_data in self.content_graph.nodes(data=True): | |
if target_data.get('url') == link: | |
self.content_graph.add_edge(node_id, target_id, type='internal_link') | |
break | |
def build_taxonomy_edges(self, content: Dict): | |
"""Build edges based on categories and tags""" | |
# Create category nodes | |
for cat in content['categories']: | |
self.content_graph.add_node( | |
f"cat_{cat['id']}", | |
type='category', | |
name=cat['name'], | |
slug=cat['slug'] | |
) | |
# Create tag nodes | |
for tag in content['tags']: | |
self.content_graph.add_node( | |
f"tag_{tag['id']}", | |
type='tag', | |
name=tag['name'], | |
slug=tag['slug'] | |
) | |
# Connect posts to categories and tags | |
for node_id, data in self.content_graph.nodes(data=True): | |
if data['type'] == 'post': | |
for cat_id in data.get('categories', []): | |
self.content_graph.add_edge(node_id, f"cat_{cat_id}", type='categorized_as') | |
for tag_id in data.get('tags', []): | |
self.content_graph.add_edge(node_id, f"tag_{tag_id}", type='tagged_as') | |
def analyze_query_patterns(self) -> Dict: | |
"""Analyze content for complex query patterns using Claude""" | |
patterns = { | |
'complex_queries': [], | |
'decompositions': {}, | |
'coverage_analysis': {}, | |
'opportunities': [] | |
} | |
# Sample content for analysis | |
sample_content = self.get_content_sample() | |
# Analyze with Claude | |
prompt = f"""Analyze this WordPress site content for Google AI Mode query optimization opportunities. | |
Site URL: {self.site_url} | |
Content Sample: | |
{json.dumps(sample_content, indent=2)[:3000]} | |
Identify: | |
1. Complex queries users might ask that would trigger Google's query fan-out | |
2. How Google would decompose these queries into sub-queries | |
3. Which content currently answers which sub-queries | |
4. Gaps where sub-queries aren't answered | |
5. Multi-source optimization opportunities | |
Focus on queries that would require multiple hops of reasoning to answer fully. | |
Provide analysis in JSON format with: | |
- complex_queries: List of potential complex user queries | |
- decompositions: How each query would be broken down | |
- current_coverage: Which content addresses which sub-queries | |
- gaps: Missing sub-query content | |
- recommendations: Specific content to create""" | |
try: | |
response = self.claude.messages.create( | |
model="claude-3-opus-20240229", | |
max_tokens=4000, | |
messages=[{"role": "user", "content": prompt}] | |
) | |
# Parse Claude's response | |
analysis = self.parse_claude_response(response.content[0].text) | |
patterns.update(analysis) | |
except Exception as e: | |
st.error(f"Error analyzing with Claude: {e}") | |
return patterns | |
def get_content_sample(self) -> List[Dict]: | |
"""Get a representative sample of content""" | |
sample = [] | |
for node_id, data in list(self.content_graph.nodes(data=True))[:20]: | |
if data['type'] in ['post', 'page']: | |
sample.append({ | |
'title': data['title'], | |
'type': data['type'], | |
'excerpt': data.get('excerpt', '')[:200], | |
'url': data['url'] | |
}) | |
return sample | |
def parse_claude_response(self, response_text: str) -> Dict: | |
"""Parse Claude's response into structured data""" | |
try: | |
# Try to extract JSON from response | |
json_match = re.search(r'\{[\s\S]*\}', response_text) | |
if json_match: | |
return json.loads(json_match.group()) | |
else: | |
# Fallback parsing | |
return self.fallback_parse(response_text) | |
except: | |
return self.fallback_parse(response_text) | |
def fallback_parse(self, text: str) -> Dict: | |
"""Fallback parsing if JSON extraction fails""" | |
return { | |
'complex_queries': re.findall(r'"([^"]+\?)"', text), | |
'recommendations': [text], | |
'gaps': [] | |
} | |
def analyze_content_depth(self) -> Dict: | |
"""Analyze content depth and multi-hop potential""" | |
depth_analysis = { | |
'content_scores': {}, | |
'hub_potential': [], | |
'orphan_content': [], | |
'semantic_clusters': [] | |
} | |
# Calculate content depth scores | |
for node_id, data in self.content_graph.nodes(data=True): | |
if data['type'] in ['post', 'page']: | |
score = self.calculate_content_depth(data) | |
depth_analysis['content_scores'][node_id] = { | |
'title': data['title'], | |
'url': data['url'], | |
'depth_score': score, | |
'word_count': len(data.get('content', '').split()), | |
'internal_links': self.content_graph.out_degree(node_id), | |
'backlinks': self.content_graph.in_degree(node_id) | |
} | |
# Identify hub potential | |
for node_id, score_data in depth_analysis['content_scores'].items(): | |
if score_data['internal_links'] > 5 and score_data['depth_score'] > 0.7: | |
depth_analysis['hub_potential'].append(score_data) | |
# Find orphan content | |
for node_id, score_data in depth_analysis['content_scores'].items(): | |
if score_data['backlinks'] == 0 and score_data['internal_links'] < 2: | |
depth_analysis['orphan_content'].append(score_data) | |
# Identify semantic clusters | |
depth_analysis['semantic_clusters'] = self.identify_semantic_clusters() | |
return depth_analysis | |
def calculate_content_depth(self, node_data: Dict) -> float: | |
"""Calculate a depth score for content""" | |
score = 0.0 | |
# Word count factor | |
word_count = len(node_data.get('content', '').split()) | |
if word_count > 2000: | |
score += 0.3 | |
elif word_count > 1000: | |
score += 0.2 | |
elif word_count > 500: | |
score += 0.1 | |
# Heading structure (simplified) | |
content = node_data.get('content', '') | |
h2_count = content.count('<h2') + content.count('## ') | |
h3_count = content.count('<h3') + content.count('### ') | |
if h2_count > 3: | |
score += 0.2 | |
if h3_count > 5: | |
score += 0.1 | |
# Media presence | |
if '<img' in content or '[gallery' in content: | |
score += 0.1 | |
# Lists and structured data | |
if '<ul' in content or '<ol' in content or '- ' in content: | |
score += 0.1 | |
# Schema markup indicators | |
if 'itemtype' in content or '@type' in content: | |
score += 0.2 | |
return min(score, 1.0) | |
def identify_semantic_clusters(self) -> List[Dict]: | |
"""Identify semantic content clusters using TF-IDF""" | |
# Prepare content for vectorization | |
content_texts = [] | |
node_ids = [] | |
for node_id, data in self.content_graph.nodes(data=True): | |
if data['type'] in ['post', 'page'] and data.get('content'): | |
content_texts.append(data['content']) | |
node_ids.append(node_id) | |
if not content_texts: | |
return [] | |
# Vectorize content | |
try: | |
tfidf_matrix = self.tfidf_vectorizer.fit_transform(content_texts) | |
# Calculate similarity matrix | |
similarity_matrix = cosine_similarity(tfidf_matrix) | |
# Identify clusters (simplified clustering) | |
clusters = [] | |
visited = set() | |
for i in range(len(node_ids)): | |
if node_ids[i] in visited: | |
continue | |
cluster = { | |
'center': node_ids[i], | |
'members': [], | |
'theme': self.extract_cluster_theme(i, tfidf_matrix) | |
} | |
for j in range(len(node_ids)): | |
if similarity_matrix[i][j] > 0.3: # Similarity threshold | |
cluster['members'].append({ | |
'id': node_ids[j], | |
'similarity': float(similarity_matrix[i][j]) | |
}) | |
visited.add(node_ids[j]) | |
if len(cluster['members']) > 1: | |
clusters.append(cluster) | |
return clusters | |
except Exception as e: | |
st.error(f"Error in semantic clustering: {e}") | |
return [] | |
def extract_cluster_theme(self, doc_index: int, tfidf_matrix) -> List[str]: | |
"""Extract theme keywords for a cluster""" | |
feature_names = self.tfidf_vectorizer.get_feature_names_out() | |
doc_tfidf = tfidf_matrix[doc_index].toarray()[0] | |
# Get top 5 terms | |
top_indices = doc_tfidf.argsort()[-5:][::-1] | |
return [feature_names[i] for i in top_indices if doc_tfidf[i] > 0] | |
def generate_optimization_report(self, progress_callback=None) -> Dict: | |
"""Generate comprehensive optimization report""" | |
# Fetch and analyze content | |
if progress_callback: | |
progress_callback(0.2, "Fetching content...") | |
content = self.fetch_all_content() | |
if progress_callback: | |
progress_callback(0.4, "Building content graph...") | |
self.build_content_graph(content) | |
# Run analyses | |
if progress_callback: | |
progress_callback(0.6, "Analyzing query patterns...") | |
query_patterns = self.analyze_query_patterns() | |
if progress_callback: | |
progress_callback(0.8, "Analyzing content depth...") | |
depth_analysis = self.analyze_content_depth() | |
# Generate recommendations | |
if progress_callback: | |
progress_callback(0.9, "Generating recommendations...") | |
recommendations = self.generate_recommendations(query_patterns, depth_analysis) | |
# Compile report | |
report = { | |
'site_url': self.site_url, | |
'analysis_date': datetime.now().isoformat(), | |
'summary': { | |
'total_posts': len(content['posts']), | |
'total_pages': len(content['pages']), | |
'content_nodes': self.content_graph.number_of_nodes(), | |
'internal_links': self.content_graph.number_of_edges(), | |
'orphan_content': len(depth_analysis['orphan_content']), | |
'hub_pages': len(depth_analysis['hub_potential']), | |
'semantic_clusters': len(depth_analysis['semantic_clusters']) | |
}, | |
'query_optimization': query_patterns, | |
'content_depth': depth_analysis, | |
'recommendations': recommendations, | |
'action_plan': self.create_action_plan(recommendations) | |
} | |
if progress_callback: | |
progress_callback(1.0, "Analysis complete!") | |
return report | |
def generate_recommendations(self, query_patterns: Dict, depth_analysis: Dict) -> List[Dict]: | |
"""Generate specific optimization recommendations""" | |
recommendations = [] | |
# Query coverage recommendations | |
if 'gaps' in query_patterns: | |
for gap in query_patterns.get('gaps', []): | |
recommendations.append({ | |
'type': 'content_gap', | |
'priority': 'high', | |
'action': 'Create new content', | |
'details': f"Create content to answer sub-query: {gap}", | |
'impact': 'Enables multi-hop reasoning path' | |
}) | |
# Orphan content recommendations | |
for orphan in depth_analysis['orphan_content'][:5]: # Top 5 | |
recommendations.append({ | |
'type': 'orphan_content', | |
'priority': 'medium', | |
'action': 'Add internal links', | |
'details': f"Connect orphan content: {orphan['title']}", | |
'url': orphan['url'], | |
'impact': 'Improves content graph connectivity' | |
}) | |
# Hub optimization | |
for hub in depth_analysis['hub_potential'][:3]: # Top 3 | |
recommendations.append({ | |
'type': 'hub_optimization', | |
'priority': 'high', | |
'action': 'Enhance hub page', | |
'details': f"Optimize hub potential: {hub['title']}", | |
'url': hub['url'], | |
'impact': 'Strengthens multi-source selection' | |
}) | |
# Semantic cluster recommendations | |
for cluster in depth_analysis['semantic_clusters'][:3]: # Top 3 | |
recommendations.append({ | |
'type': 'semantic_bridge', | |
'priority': 'medium', | |
'action': 'Create semantic bridges', | |
'details': f"Link related content in cluster: {', '.join(cluster['theme'])}", | |
'impact': 'Enables query fan-out paths' | |
}) | |
return recommendations | |
def create_action_plan(self, recommendations: List[Dict]) -> Dict: | |
"""Create prioritized action plan""" | |
action_plan = { | |
'immediate': [], | |
'short_term': [], | |
'long_term': [] | |
} | |
for rec in recommendations: | |
if rec['priority'] == 'high': | |
action_plan['immediate'].append({ | |
'action': rec['action'], | |
'details': rec['details'], | |
'expected_impact': rec['impact'] | |
}) | |
elif rec['priority'] == 'medium': | |
action_plan['short_term'].append({ | |
'action': rec['action'], | |
'details': rec['details'], | |
'expected_impact': rec['impact'] | |
}) | |
else: | |
action_plan['long_term'].append({ | |
'action': rec['action'], | |
'details': rec['details'], | |
'expected_impact': rec['impact'] | |
}) | |
return action_plan | |
# Streamlit UI | |
def main(): | |
st.title("π WordPress SEO Query Fan-Out Analyzer") | |
st.markdown("Optimize your WordPress site for Google's AI Mode multi-hop reasoning") | |
# Sidebar | |
with st.sidebar: | |
st.header("βοΈ Configuration") | |
site_url = st.text_input( | |
"WordPress Site URL", | |
placeholder="https://example.com", | |
help="Enter the URL of your WordPress site" | |
) | |
claude_api_key = st.text_input( | |
"Claude API Key", | |
type="password", | |
placeholder="sk-ant-...", | |
help="Your Anthropic Claude API key" | |
) | |
st.markdown("---") | |
analyze_button = st.button("π Start Analysis", type="primary", use_container_width=True) | |
st.markdown("---") | |
st.markdown(""" | |
### π About This Tool | |
This analyzer helps optimize your WordPress site for Google's AI-powered search features by: | |
- πΈοΈ Mapping content relationships | |
- π Identifying query patterns | |
- π Analyzing content depth | |
- π― Finding optimization opportunities | |
- π Generating actionable recommendations | |
""") | |
# Main content area | |
if analyze_button: | |
if not site_url or not claude_api_key: | |
st.error("Please provide both WordPress site URL and Claude API key") | |
return | |
# Validate URL | |
try: | |
result = urlparse(site_url) | |
if not all([result.scheme, result.netloc]): | |
st.error("Please enter a valid URL (e.g., https://example.com)") | |
return | |
except: | |
st.error("Invalid URL format") | |
return | |
# Initialize analyzer | |
with st.spinner("Initializing analyzer..."): | |
try: | |
analyzer = WordPressQueryFanOutAnalyzer(site_url, claude_api_key) | |
except Exception as e: | |
st.error(f"Failed to initialize analyzer: {e}") | |
return | |
# Progress tracking | |
progress_bar = st.progress(0) | |
status_text = st.empty() | |
def update_progress(progress, status): | |
progress_bar.progress(progress) | |
status_text.text(status) | |
# Run analysis | |
try: | |
report = analyzer.generate_optimization_report(progress_callback=update_progress) | |
# Clear progress indicators | |
progress_bar.empty() | |
status_text.empty() | |
# Display results in tabs | |
tab1, tab2, tab3, tab4, tab5 = st.tabs([ | |
"π Overview", | |
"π Query Analysis", | |
"π Content Depth", | |
"π‘ Recommendations", | |
"π₯ Export" | |
]) | |
with tab1: | |
st.header("Site Overview") | |
# Metrics | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.metric("Total Posts", report['summary']['total_posts']) | |
with col2: | |
st.metric("Total Pages", report['summary']['total_pages']) | |
with col3: | |
st.metric("Content Nodes", report['summary']['content_nodes']) | |
with col4: | |
st.metric("Internal Links", report['summary']['internal_links']) | |
col5, col6, col7, col8 = st.columns(4) | |
with col5: | |
st.metric("Orphan Content", report['summary']['orphan_content'], | |
delta="Need attention", delta_color="inverse") | |
with col6: | |
st.metric("Hub Pages", report['summary']['hub_pages']) | |
with col7: | |
st.metric("Semantic Clusters", report['summary']['semantic_clusters']) | |
with col8: | |
st.metric("Total Recommendations", len(report['recommendations'])) | |
# Content graph visualization | |
st.subheader("Content Network Graph") | |
# Create a simple network visualization | |
if analyzer.content_graph.number_of_nodes() > 0: | |
# Create edge trace | |
edge_x = [] | |
edge_y = [] | |
# Use spring layout for positioning | |
pos = nx.spring_layout(analyzer.content_graph, k=1, iterations=50) | |
for edge in analyzer.content_graph.edges(): | |
x0, y0 = pos[edge[0]] | |
x1, y1 = pos[edge[1]] | |
edge_x.extend([x0, x1, None]) | |
edge_y.extend([y0, y1, None]) | |
edge_trace = go.Scatter( | |
x=edge_x, y=edge_y, | |
line=dict(width=0.5, color='#888'), | |
hoverinfo='none', | |
mode='lines' | |
) | |
# Create node trace | |
node_x = [] | |
node_y = [] | |
node_text = [] | |
node_colors = [] | |
color_map = { | |
'post': '#1f77b4', | |
'page': '#ff7f0e', | |
'category': '#2ca02c', | |
'tag': '#d62728' | |
} | |
for node in analyzer.content_graph.nodes(): | |
x, y = pos[node] | |
node_x.append(x) | |
node_y.append(y) | |
node_data = analyzer.content_graph.nodes[node] | |
node_text.append(node_data.get('title', node_data.get('name', str(node)))[:30]) | |
node_colors.append(color_map.get(node_data.get('type', 'post'), '#999')) | |
node_trace = go.Scatter( | |
x=node_x, y=node_y, | |
mode='markers+text', | |
hoverinfo='text', | |
text=node_text, | |
textposition="top center", | |
marker=dict( | |
showscale=False, | |
colorscale='YlGnBu', | |
size=10, | |
color=node_colors, | |
line_width=2 | |
) | |
) | |
fig = go.Figure(data=[edge_trace, node_trace], | |
layout=go.Layout( | |
showlegend=False, | |
hovermode='closest', | |
margin=dict(b=0, l=0, r=0, t=0), | |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
height=600 | |
)) | |
st.plotly_chart(fig, use_container_width=True) | |
with tab2: | |
st.header("Query Pattern Analysis") | |
query_data = report.get('query_optimization', {}) | |
if query_data.get('complex_queries'): | |
st.subheader("π― Potential Complex Queries") | |
for i, query in enumerate(query_data['complex_queries'][:5], 1): | |
st.write(f"{i}. {query}") | |
if query_data.get('gaps'): | |
st.subheader("β οΈ Content Gaps") | |
for gap in query_data.get('gaps', []): | |
st.warning(f"Missing content for: {gap}") | |
if query_data.get('recommendations'): | |
st.subheader("π Claude's Analysis") | |
for rec in query_data.get('recommendations', []): | |
st.info(rec) | |
with tab3: | |
st.header("Content Depth Analysis") | |
depth_data = report.get('content_depth', {}) | |
# Hub pages | |
if depth_data.get('hub_potential'): | |
st.subheader("π High-Potential Hub Pages") | |
hub_df = pd.DataFrame(depth_data['hub_potential']) | |
if not hub_df.empty: | |
hub_df = hub_df.sort_values('depth_score', ascending=False) | |
st.dataframe( | |
hub_df[['title', 'depth_score', 'internal_links', 'backlinks', 'word_count']], | |
use_container_width=True | |
) | |
# Orphan content | |
if depth_data.get('orphan_content'): | |
st.subheader("π Orphan Content (Needs Linking)") | |
orphan_df = pd.DataFrame(depth_data['orphan_content'][:10]) | |
if not orphan_df.empty: | |
st.dataframe( | |
orphan_df[['title', 'word_count', 'url']], | |
use_container_width=True | |
) | |
# Semantic clusters | |
if depth_data.get('semantic_clusters'): | |
st.subheader("π§© Semantic Content Clusters") | |
for i, cluster in enumerate(depth_data['semantic_clusters'][:5], 1): | |
with st.expander(f"Cluster {i}: {', '.join(cluster.get('theme', []))}"): | |
st.write(f"**Theme Keywords:** {', '.join(cluster.get('theme', []))}") | |
st.write(f"**Number of related pages:** {len(cluster.get('members', []))}") | |
with tab4: | |
st.header("Optimization Recommendations") | |
# Action plan | |
action_plan = report.get('action_plan', {}) | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.subheader("π¨ Immediate Actions") | |
for action in action_plan.get('immediate', []): | |
st.markdown(f""" | |
<div class="recommendation-card high-priority"> | |
<strong>{action['action']}</strong><br> | |
{action['details']}<br> | |
<em>Impact: {action['expected_impact']}</em> | |
</div> | |
""", unsafe_allow_html=True) | |
with col2: | |
st.subheader("π Short-term Actions") | |
for action in action_plan.get('short_term', []): | |
st.markdown(f""" | |
<div class="recommendation-card medium-priority"> | |
<strong>{action['action']}</strong><br> | |
{action['details']}<br> | |
<em>Impact: {action['expected_impact']}</em> | |
</div> | |
""", unsafe_allow_html=True) | |
with col3: | |
st.subheader("π Long-term Actions") | |
for action in action_plan.get('long_term', []): | |
st.markdown(f""" | |
<div class="recommendation-card low-priority"> | |
<strong>{action['action']}</strong><br> | |
{action['details']}<br> | |
<em>Impact: {action['expected_impact']}</em> | |
</div> | |
""", unsafe_allow_html=True) | |
# Detailed recommendations | |
st.subheader("π All Recommendations") | |
if report.get('recommendations'): | |
rec_df = pd.DataFrame(report['recommendations']) | |
st.dataframe(rec_df, use_container_width=True) | |
with tab5: | |
st.header("Export Report") | |
# JSON export | |
json_str = json.dumps(report, indent=2) | |
st.download_button( | |
label="π₯ Download Full Report (JSON)", | |
data=json_str, | |
file_name=f"seo_report_{site_url.replace('https://', '').replace('/', '_')}.json", | |
mime="application/json" | |
) | |
# CSV export of recommendations | |
if report.get('recommendations'): | |
rec_df = pd.DataFrame(report['recommendations']) | |
csv = rec_df.to_csv(index=False) | |
st.download_button( | |
label="π₯ Download Recommendations (CSV)", | |
data=csv, | |
file_name=f"recommendations_{site_url.replace('https://', '').replace('/', '_')}.csv", | |
mime="text/csv" | |
) | |
# Summary report | |
summary = f""" | |
# SEO Analysis Report | |
**Site:** {report['site_url']} | |
**Analysis Date:** {report['analysis_date']} | |
## Summary | |
- Total Posts: {report['summary']['total_posts']} | |
- Total Pages: {report['summary']['total_pages']} | |
- Content Nodes: {report['summary']['content_nodes']} | |
- Internal Links: {report['summary']['internal_links']} | |
- Orphan Content: {report['summary']['orphan_content']} | |
- Hub Pages: {report['summary']['hub_pages']} | |
- Semantic Clusters: {report['summary']['semantic_clusters']} | |
## Top Recommendations | |
{chr(10).join([f"- {rec['action']}: {rec['details']}" for rec in report['recommendations'][:5]])} | |
""" | |
st.download_button( | |
label="π₯ Download Summary (Markdown)", | |
data=summary, | |
file_name=f"summary_{site_url.replace('https://', '').replace('/', '_')}.md", | |
mime="text/markdown" | |
) | |
except Exception as e: | |
st.error(f"Analysis failed: {str(e)}") | |
st.exception(e) | |
else: | |
# Welcome screen | |
st.markdown(""" | |
## Welcome to the WordPress SEO Query Fan-Out Analyzer! π | |
This tool helps you optimize your WordPress site for Google's AI-powered search features | |
by analyzing your content structure and identifying opportunities for multi-hop reasoning paths. | |
### π― What This Tool Does: | |
1. **Content Mapping** - Builds a comprehensive graph of your site's content relationships | |
2. **Query Analysis** - Uses Claude AI to identify complex queries your content could answer | |
3. **Depth Analysis** - Evaluates content quality and identifies hub pages | |
4. **Gap Detection** - Finds missing content that prevents complete query answers | |
5. **Recommendations** - Provides actionable steps to improve your SEO | |
### π Getting Started: | |
1. Enter your WordPress site URL in the sidebar | |
2. Add your Claude API key (get one at [anthropic.com](https://www.anthropic.com)) | |
3. Click "Start Analysis" and wait for the results | |
The analysis typically takes 2-5 minutes depending on your site size. | |
""") | |
if __name__ == "__main__": | |
main() |