flowchart TB
%% ==========================================================================
%% STYLING
%% ==========================================================================
classDef entry fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
classDef userAction fill:#c8e6c9,stroke:#2e7d32,stroke-width:2px
classDef gate fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
classDef queue fill:#fff3e0,stroke:#ef6c00,stroke-width:2px
classDef free fill:#e8f5e9,stroke:#2e7d32,stroke-width:1px
classDef cheap fill:#fffde7,stroke:#f9a825,stroke-width:1px
classDef expensive fill:#ffebee,stroke:#c62828,stroke-width:1px
classDef storage fill:#fce4ec,stroke:#ad1457,stroke-width:2px
classDef learning fill:#e0f7fa,stroke:#00838f,stroke-width:2px
classDef notbuilt fill:#ffcdd2,stroke:#b71c1c,stroke-width:3px,stroke-dasharray: 5 5
classDef dataforseo fill:#fff8e1,stroke:#ff8f00,stroke-width:2px
classDef admin fill:#e8eaf6,stroke:#3f51b5,stroke-width:2px
%% ==========================================================================
%% USER ACTIONS (React UI)
%% ==========================================================================
subgraph UserActions["USER ACTIONS - React UI"]
direction TB
ADD_ASSET_WEB["POST /api/assets<br/>type: website<br/><br/>Add Website Asset<br/>→ Domain onboarding<br/>→ Daily SERP tracking"]
ADD_ASSET_APP["POST /api/assets<br/>type: app<br/><br/>Add App Asset<br/>→ Creates app_subscription<br/>→ Daily app tracking"]
ADD_ASSET_LOCAL["POST /api/assets<br/>type: local_entity<br/><br/>Add Local Business<br/>→ Google Places autocomplete<br/>→ Captures CID + website<br/>→ Triggers website onboarding<br/>→ Daily local pack tracking"]
ADD_COMPETITOR["POST /api/competitors<br/><br/>Add to Watchlist<br/>→ Domain onboarding<br/>→ ❌ Daily summary NOT BUILT"]
ADD_KEYWORD["POST /api/keywords<br/><br/>Add Keyword to Project<br/>→ Creates keyword_subscription<br/>→ Daily SERP tracking"]
end
class ADD_ASSET_WEB,ADD_ASSET_APP,ADD_ASSET_LOCAL,ADD_COMPETITOR,ADD_KEYWORD userAction
%% ==========================================================================
%% ADMIN CONSOLE (Cloudflare Pages)
%% ==========================================================================
subgraph AdminConsole["ADMIN CONSOLE - Cloudflare Pages"]
direction TB
ADMIN_DASH["Dashboard<br/><br/>System KPIs<br/>Classification health<br/>Queue status"]
ADMIN_CLASS["Classification Pages<br/><br/>• Domains<br/>• URLs<br/>• Keywords<br/><br/>DataTable with sort/page/search<br/>Distribution charts"]
ADMIN_CORR["Corrections Page<br/><br/>✅ NEW: Negative Learning<br/>• Submit corrections<br/>• View patterns<br/>• Generate rules<br/>• Process pending"]
ADMIN_OPS["Operations Pages<br/><br/>• Costs tracking<br/>• Queue status<br/>• Budget alerts"]
ADMIN_DASH --> ADMIN_CLASS
ADMIN_CLASS --> ADMIN_CORR
ADMIN_DASH --> ADMIN_OPS
end
class ADMIN_DASH,ADMIN_CLASS,ADMIN_CORR,ADMIN_OPS admin
%% ==========================================================================
%% APP STORE CRAWLS
%% ==========================================================================
subgraph AppStores["APP STORE CRAWLS"]
direction LR
APPLE["Apple App Store<br/><br/>Chart Crawls<br/>FREE via RSS"]
GOOGLE["Google Play<br/><br/>DataForSEO<br/>App webhooks"]
end
class APPLE,GOOGLE entry
%% ==========================================================================
%% DATAFORSEO WEBHOOKS (Entry Points for Google Play)
%% ==========================================================================
subgraph DFSWebhooks["DATAFORSEO WEBHOOKS"]
direction TB
APP_LIST_WEBHOOK["dataforseo-app-list-webhook<br/><br/>POST /webhooks/dataforseo/app-list<br/>Receives category rankings<br/>Primary for Google Play<br/>Fallback for Apple"]
APP_INFO_WEBHOOK["dataforseo-app-info-webhook<br/><br/>POST /webhooks/dataforseo/app-info<br/>Receives full app details<br/>Updates apps + rankings"]
end
class APP_LIST_WEBHOOK,APP_INFO_WEBHOOK dataforseo
%% Webhooks flow to gatekeepers
APP_LIST_WEBHOOK --> ENSURE_URL
APP_INFO_WEBHOOK --> ENSURE_URL
APP_LIST_WEBHOOK -->|"queues"| APP_INFO_QUEUE
%% Google Play uses webhooks
GOOGLE --> APP_LIST_WEBHOOK
%% ==========================================================================
%% DOMAIN ONBOARDING (Weekly/Monthly)
%% ==========================================================================
subgraph Onboard["DOMAIN ONBOARDING - Weekly/Monthly"]
direction TB
ONBOARD_TRIGGER["triggerDomainOnboarding<br/><br/>Queues to DOMAIN_ONBOARD_QUEUE"]
ONBOARD_CONSUMER["domain-onboard-consumer<br/><br/>Processes new domains"]
subgraph DFS_Calls["DataForSEO API Calls"]
direction LR
DFS_BACKLINKS["Backlinks API<br/><br/>Fetch referring domains<br/>+ source URLs"]
DFS_RANKED["Ranked Keywords API<br/><br/>Keywords domain ranks for<br/>+ ranking URLs<br/><br/>Refresh: Weekly/Monthly"]
end
ONBOARD_TRIGGER --> ONBOARD_CONSUMER
ONBOARD_CONSUMER --> DFS_BACKLINKS
ONBOARD_CONSUMER --> DFS_RANKED
end
class ONBOARD_TRIGGER,ONBOARD_CONSUMER,DFS_BACKLINKS,DFS_RANKED dataforseo
%% ==========================================================================
%% LOCAL BUSINESS DETECTION
%% ==========================================================================
subgraph LocalBiz["LOCAL BUSINESS DETECTION"]
direction TB
LOCAL_DETECT["detectLocalBusiness()<br/><br/>Runs after onboarding complete<br/>Analyzes keyword locations<br/><br/>Signals:<br/>• Local intent ratio<br/>• Near me keywords<br/>• Service patterns<br/>• Location concentration"]
LOCAL_RESULT["Detection Result<br/><br/>is_local_business: true/false<br/>primary_location_id<br/>service_area_type<br/>confidence: 0-100"]
LOCAL_PROMPT["UI Suggestion<br/><br/>'Looks like you serve Austin, TX!'<br/>'Track your Google Business Profile?'<br/><br/>GET /local-business-suggestion"]
GBP_SEARCH["Google Places Autocomplete<br/><br/>User searches business name<br/>Selects their listing<br/>CID captured"]
LOCAL_ENTITY[("local_entities<br/><br/>cid, name, address<br/>lat, lng, location_id<br/>domain_id")]
LOCAL_DETECT --> LOCAL_RESULT
LOCAL_RESULT -->|"is_local = true"| LOCAL_PROMPT
LOCAL_PROMPT -->|"user clicks Find"| GBP_SEARCH
GBP_SEARCH --> LOCAL_ENTITY
end
class LOCAL_DETECT,LOCAL_RESULT free
class LOCAL_PROMPT,GBP_SEARCH userAction
class LOCAL_ENTITY storage
%% Onboarding complete triggers local business detection
ONBOARD_CONSUMER -->|"all jobs done"| LOCAL_DETECT
%% ==========================================================================
%% BRAND HIERARCHY (Cross-Channel Identity)
%% ==========================================================================
subgraph BrandUnify["BRAND HIERARCHY - Cross-Channel Identity"]
direction TB
BRAND_TABLE[("brands<br/><br/>• parent_brand_id → brands.id<br/> (Alphabet → Google)<br/>• primary_domain_id → domains.id<br/> (main display domain)<br/>• name, normalized_name")]
DOMAIN_OWNERSHIP[("domains.brand_id<br/><br/>Many-to-one ownership:<br/>• nike.com → Nike<br/>• shop.nike.com → Nike<br/>• nike.co.uk → Nike")]
BRAND_TABLE -->|"owns"| DOMAIN_OWNERSHIP
BRAND_TABLE -->|"parent_brand_id"| BRAND_TABLE
end
class BRAND_TABLE,DOMAIN_OWNERSHIP storage
%% User actions flow to onboarding
ADD_ASSET_WEB -->|"website"| ENSURE_DOMAIN
ADD_ASSET_WEB -->|"triggers onboarding"| ONBOARD_TRIGGER
ADD_ASSET_WEB -->|"creates"| KW_SUBS
ADD_ASSET_LOCAL -->|"Google Places"| LOCAL_ENTITY
ADD_ASSET_LOCAL -->|"website_url"| ENSURE_DOMAIN
ADD_ASSET_LOCAL -->|"triggers onboarding"| ONBOARD_TRIGGER
ADD_ASSET_LOCAL -->|"creates"| KW_SUBS
ADD_COMPETITOR -->|"watchlist domain"| ENSURE_DOMAIN
ADD_COMPETITOR -->|"triggers onboarding"| ONBOARD_TRIGGER
%% App asset creates subscription for daily tracking
ADD_ASSET_APP -->|"creates"| APP_SUBS
ADD_ASSET_APP -->|"website_url"| ENSURE_DOMAIN
%% Apps link to brands
APPS_TABLE[("apps<br/><br/>brand_id → brands<br/>website_url → domains")]
ADD_ASSET_APP --> APPS_TABLE
APPS_TABLE -->|"brand_id"| BRAND_TABLE
class APPS_TABLE storage
%% App stores flow to domain (developer URLs)
APPLE --> ENSURE_DOMAIN
GOOGLE --> ENSURE_DOMAIN
%% ==========================================================================
%% DAILY SUBSCRIPTIONS (Cron)
%% ==========================================================================
subgraph DailySubs["DAILY SUBSCRIPTIONS - subscription-scheduler.js"]
direction TB
CRON_DAILY["cron-daily.js<br/><br/>Runs daily 2AM Pacific<br/>handleDailySubscriptions()"]
KW_SUBS[("keyword_subscriptions<br/><br/>DAILY SERP tracking<br/>google_serp, google_local<br/><br/>Created when user adds<br/>keyword or asset")]
APP_SUBS[("app_subscriptions<br/><br/>DAILY app tracking<br/>apple_app_store, google_play<br/><br/>Created when user adds<br/>app asset")]
DOM_SUBS[("domain_subscriptions<br/><br/>❌ DAILY domain tracking<br/>ranked_keywords summary<br/>NOT BUILT - no consumer<br/><br/>Should track watchlist<br/>competitors daily")]
CRON_DAILY --> KW_SUBS
CRON_DAILY --> APP_SUBS
CRON_DAILY --> DOM_SUBS
end
class CRON_DAILY entry
class KW_SUBS,APP_SUBS storage
class DOM_SUBS notbuilt
%% ==========================================================================
%% SERP TRACKING (Daily)
%% ==========================================================================
subgraph SerpTracking["SERP TRACKING - Daily"]
direction TB
SERP_CONSUMER["serp-consumer<br/><br/>Fetch live SERP results<br/>Extract ranking URLs"]
SERP_QUEUE_CONSUMER["serp-queue-consumer<br/><br/>SERP_QUEUE<br/>Background SERP checks<br/>User-initiated keyword tracking<br/>Calls handleTrackRankings()"]
KEYWORDS_TABLE[("keywords table<br/><br/>User's tracked keywords<br/>per project")]
end
class SERP_CONSUMER,SERP_QUEUE_CONSUMER dataforseo
%% ==========================================================================
%% APP TRACKING (Daily)
%% ==========================================================================
subgraph AppTracking["APP TRACKING - Daily"]
direction TB
APP_DAILY["app-daily-refresh.js<br/><br/>Daily category crawls<br/>Rankings only (skip details)"]
APP_KEYWORD_CONSUMER["app-keyword-consumer<br/><br/>App keyword searches<br/>Apple scrape / Google DFS"]
APP_DETAILS_CONSUMER["app-details-consumer<br/><br/>Fetch app metadata<br/>Icon, rating, description"]
end
class APP_DAILY,APP_KEYWORD_CONSUMER,APP_DETAILS_CONSUMER dataforseo
%% ==========================================================================
%% APP CRAWL CONSUMERS (Category & Shelf)
%% ==========================================================================
subgraph AppCrawlConsumers["APP CRAWL CONSUMERS"]
direction TB
APP_CRAWL_CONSUMER["app-crawl-consumer<br/><br/>APP_CRAWL_QUEUE<br/>Handles crawl_category + fetch_app<br/>Rate limited: 4s Apple, 8s Google<br/>Routes to scrapers or DataForSEO"]
SHELF_CRAWL_CONSUMER["shelf-crawl-consumer<br/><br/>SHELF_CRAWL_QUEUE<br/>Processes shelf/room URLs<br/>Stories, rooms, inline shelves<br/>Saves to app_category_rankings"]
APP_INFO_QUEUE[["APP_INFO_QUEUE<br/><br/>Queues app detail fetches<br/>→ DataForSEO app/info"]]
end
class APP_CRAWL_CONSUMER,SHELF_CRAWL_CONSUMER dataforseo
class APP_INFO_QUEUE queue
%% App crawl flows
APPLE -->|"groupings, charts"| APP_CRAWL_CONSUMER
APP_CRAWL_CONSUMER -->|"discovers shelves"| SHELF_CRAWL_CONSUMER
APP_CRAWL_CONSUMER -->|"queues details"| APP_INFO_QUEUE
SHELF_CRAWL_CONSUMER -->|"queues details"| APP_INFO_QUEUE
APP_INFO_QUEUE --> APP_INFO_WEBHOOK
%% ==========================================================================
%% SOCIAL SCRAPE CONSUMER (URL Enrichment)
%% ==========================================================================
subgraph SocialScrape["SOCIAL SCRAPE - Daily Batches"]
direction TB
SOCIAL_SCRAPE_CONSUMER["social-scrape-consumer<br/><br/>SOCIAL_SCRAPE_QUEUE<br/>Scrapes social accounts<br/>from developer domains<br/><br/>Daily batches on URLs table<br/>Saves to social_accounts"]
SOCIAL_ACCOUNTS[("social_accounts<br/><br/>twitter, linkedin, facebook<br/>instagram, youtube, etc.<br/>Linked to domain_id")]
end
class SOCIAL_SCRAPE_CONSUMER cheap
class SOCIAL_ACCOUNTS storage
%% Social scrape flows from domain onboarding
ONBOARD_CONSUMER -->|"queues domains"| SOCIAL_SCRAPE_CONSUMER
SOCIAL_SCRAPE_CONSUMER --> SOCIAL_ACCOUNTS
%% Subscriptions trigger consumers
KW_SUBS -->|"queues"| SERP_CONSUMER
APP_SUBS -->|"queues"| APP_KEYWORD_CONSUMER
APP_SUBS -->|"queues"| APP_DETAILS_CONSUMER
DOM_SUBS -.->|"❌ no consumer"| DOMAIN_TRACKING_CONSUMER
%% User adds keyword → creates subscription
ADD_KEYWORD -->|"creates"| KW_SUBS
ADD_KEYWORD -->|"store keyword"| KEYWORDS_TABLE
%% Watchlist SHOULD create domain subscription but daily tracking not built
ADD_COMPETITOR -.->|"❌ should create"| DOM_SUBS
%% Domain tracking consumer NOT BUILT
DOMAIN_TRACKING_CONSUMER["domain-tracking-consumer<br/><br/>❌ NOT BUILT<br/><br/>Should call:<br/>• ranked_keywords summary<br/>• Track new/lost keywords<br/>• Backlinks summary"]
class DOMAIN_TRACKING_CONSUMER notbuilt
%% ==========================================================================
%% GATEKEEPERS (Domain-First Enforcement)
%% ==========================================================================
subgraph Gates["ENTRY GATEKEEPERS - Domain-First"]
direction TB
ENSURE_DOMAIN["ensureDomain()<br/><br/>url-management.js<br/>Create/update domain record<br/>Check: confidence >= 60%?<br/>NO → Queue for classification"]
ENSURE_URL["ensureUrl()<br/><br/>url-management.js<br/>✅ Calls ensureDomain FIRST<br/>Wait for domain classification<br/>Then classify URL"]
end
class ENSURE_DOMAIN,ENSURE_URL gate
%% Backlinks produce URLs
DFS_BACKLINKS -->|"source URLs"| ENSURE_URL
%% Ranked Keywords produces BOTH keywords AND URLs
DFS_RANKED -->|"ranking URLs"| ENSURE_URL
DFS_RANKED -->|"keywords"| STORE_RKW["storeRankedKeywords()<br/><br/>domain-onboard-consumer.js<br/>Batch upsert keywords<br/>Rules classification"]
STORE_RKW --> KEYWORDS_DB2
STORE_RKW -->|"needs semantic dims"| KEYWORD_Q
%% SERP results produce URLs
SERP_CONSUMER -->|"SERP result URLs"| ENSURE_URL
%% ensureUrl cascades to ensureDomain (DOMAIN-FIRST)
ENSURE_URL -->|"domain-first"| ENSURE_DOMAIN
class STORE_RKW gate
%% ==========================================================================
%% QUEUES
%% ==========================================================================
subgraph Queues["CLASSIFICATION QUEUES"]
direction TB
DOMAIN_Q[["DOMAIN_CLASSIFY_QUEUE<br/>domain-classify-consumer"]]
URL_Q[["URL_CLASSIFY_QUEUE<br/>named: BACKLINK_CLASSIFY_QUEUE<br/>backlink-classify-consumer<br/><br/>Handles ALL URL types:<br/>• Backlink source URLs<br/>• Ranking URLs<br/>• SERP result URLs<br/>• App store URLs"]]
KEYWORD_Q[["KEYWORD_CLASSIFY_QUEUE<br/>keyword-classify-consumer<br/><br/>Background keyword<br/>classification"]]
end
class DOMAIN_Q,URL_Q,KEYWORD_Q queue
ENSURE_DOMAIN -->|"conf < 60%"| DOMAIN_Q
ENSURE_URL -->|"needs classification"| URL_Q
KEYWORDS_TABLE -->|"needs classification"| KEYWORD_Q
%% ==========================================================================
%% DOMAIN CLASSIFICATION PIPELINE
%% ==========================================================================
subgraph DomainPipe["DOMAIN CLASSIFICATION PIPELINE"]
direction TB
D0["STAGE 0: Cache<br/>Check domains DB<br/>FREE"]
D1["STAGE 1: Rules Engine<br/>7,100+ curated domains<br/>TLD rules, platform detection<br/>FREE"]
D15["STAGE 1.5: Google Ads Categories<br/>DataForSEO cached hints<br/>60+ path mappings<br/>FREE"]
D2["STAGE 2: Vectorize<br/>Embed domain + metadata<br/>Query similar, vote on type<br/>FREE"]
D3["STAGE 3: Low-Noise Crawl<br/>HEAD + partial GET 8KB<br/>Extract head metadata<br/>FREE"]
D4["STAGE 4: Instant Pages<br/>DataForSEO full page<br/>$0.000125/call"]
D45["STAGE 4.5: Domain Patterns<br/>VPN, carriers, fitness<br/>Additional pattern matching<br/>FREE"]
D5["STAGE 5: LLM<br/>Llama 3.3 70B<br/>Tree: tier1 → domain_type<br/>~$0.0001/call"]
D6["STAGE 6: Store & Learn<br/>Normalize to V3<br/>Update domains table<br/>✅ Vectorize ALL sources >= 80%"]
D0 -->|"MISS"| D1
D1 -->|"< 70%"| D15
D15 -->|"< 70%"| D2
D2 -->|"< 70%"| D3
D3 -->|"< 70%"| D4
D4 -->|"< 70%"| D45
D45 -->|"< 70%"| D5
D5 --> D6
D0 -->|"HIT >= 60%"| DDONE["Done"]
D1 -->|">= 80%"| D6
D2 -->|">= 80%"| D6
D3 -->|">= 70%"| D6
D4 -->|">= 70%"| D6
D45 -->|">= 70%"| D6
end
class D0,D1,D15,D2,D3,D45 free
class D4 cheap
class D5 expensive
DOMAIN_Q --> D0
%% ==========================================================================
%% URL CLASSIFICATION PIPELINE
%% ==========================================================================
subgraph UrlPipe["URL CLASSIFICATION PIPELINE"]
direction TB
U1["STAGE 1: Rules Engine<br/>15+ rules per dimension<br/>URL path patterns<br/>278 tactic patterns<br/>FREE"]
U15["STAGE 1.5: Location Detection<br/>225K google_locations<br/>Path, subdomain, query params<br/>FREE"]
U2["STAGE 2: Vectorize<br/>Query similar URLs<br/>Vote per dimension<br/>FREE"]
U3["STAGE 3: Content Parser<br/>Fetch hierarchy<br/>Schema, word count<br/>$0-$0.001"]
U4["STAGE 4: LLM<br/>Llama 3.3 70B<br/>Per-dimension reasoning<br/>~$0.0001/call"]
U5["STAGE 5: Store & Learn<br/>Update urls table<br/>✅ Vectorize >= 65%<br/>✅ NO bubble-up (removed)"]
U1 --> U15
U15 -->|"any dim < 70%"| U2
U2 -->|"< 70%"| U3
U3 -->|"< 70%"| U4
U4 --> U5
U15 -->|"all dims >= 80%"| U5
U2 -->|">= 80%"| U5
U3 -->|">= 80%"| U5
end
class U1,U15,U2 free
class U3 cheap
class U4 expensive
URL_Q --> U1
%% ==========================================================================
%% KEYWORD CLASSIFICATION PIPELINE
%% ==========================================================================
subgraph KeywordPipe["KEYWORD CLASSIFICATION PIPELINE"]
direction TB
K1["STAGE 1: Rules Engine<br/>20+ dimension classifiers<br/>Brand detection from DB<br/>FREE"]
K15["STAGE 1.5: Location Detection<br/>225K google_locations<br/>City, State, Country, Zip<br/>FREE"]
K2["STAGE 2: Vectorize<br/>30 seed examples<br/>Query similar keywords<br/>FREE"]
K3["STAGE 3: LLM (3 calls)<br/>• Call 1: Journey & Intent<br/>• Call 2: Buyer Context<br/>• Call 3: Topic Analysis<br/>~$0.0003/keyword"]
K4["STAGE 4: Store & Learn<br/>Save to keywords table<br/>✅ Vectorize >= 70%"]
K1 --> K15 --> K2
K2 -->|"any dim < 70%"| K3
K2 -->|"all dims >= 70%"| K4
K3 --> K4
end
class K1,K15,K2 free
class K3 expensive
class K4 learning
KEYWORD_Q --> K1
%% ==========================================================================
%% LLM VERIFY QUEUE (Automated)
%% ==========================================================================
subgraph LLMVerify["LLM VERIFY QUEUE (Automated)"]
direction TB
LLM_VERIFY_Q[["LLM_VERIFY_QUEUE<br/><br/>Automated re-verification<br/>for low-confidence URLs"]]
LLM_VERIFY_CONSUMER["llm-verify-consumer<br/><br/>Enhanced LLM verification<br/>NOT manual review"]
LLM_VERIFY_Q --> LLM_VERIFY_CONSUMER
end
class LLM_VERIFY_Q,LLM_VERIFY_CONSUMER queue
U4 -->|"needs verification"| LLM_VERIFY_Q
%% ==========================================================================
%% NEGATIVE LEARNING SYSTEM (NEW)
%% ==========================================================================
subgraph NegativeLearning["NEGATIVE LEARNING SYSTEM ✅ NEW"]
direction TB
CORR_TABLE[("classification_corrections<br/><br/>entity_type, entity_id<br/>original_dimension<br/>original_value → corrected_value<br/>confidence_before<br/>created_by, notes")]
PATTERN_TABLE[("correction_patterns<br/><br/>Aggregated corrections<br/>entity_type, dimension<br/>from_value → to_value<br/>count, suggested_rule")]
CORR_MODULE["classification-corrections.js<br/><br/>• recordCorrection()<br/>• getPendingCorrections()<br/>• getSuggestedRules()<br/>• processPendingCorrections()"]
CORR_API["Correction Endpoints<br/><br/>POST /corrections<br/>GET /corrections/stats<br/>GET /corrections/patterns<br/>POST /corrections/learn"]
CORR_TABLE --> PATTERN_TABLE
CORR_MODULE --> CORR_TABLE
CORR_MODULE --> PATTERN_TABLE
CORR_API --> CORR_MODULE
end
class CORR_TABLE,PATTERN_TABLE storage
class CORR_MODULE,CORR_API learning
%% Admin console connects to corrections
ADMIN_CORR --> CORR_API
%% Corrections feed back to Vectorize
CORR_MODULE -->|"processed"| LEARN_D
CORR_MODULE -->|"processed"| LEARN_U
CORR_MODULE -->|"processed"| LEARN_K
%% ==========================================================================
%% LOCATION DETECTION
%% ==========================================================================
subgraph LocationDetect["LOCATION DETECTION"]
direction TB
LOC_DB[("google_locations<br/>225,840 rows<br/><br/>Types:<br/>Country, State, City<br/>Neighborhood, Airport<br/>Municipality, etc.")]
LOC_KW["Keyword Location<br/><br/>Pattern: 'plumber austin'<br/>→ location_id: 1026339<br/>→ location_type: City<br/>→ location_name: Austin, TX"]
LOC_URL["URL Location<br/><br/>Path: /locations/austin-tx<br/>Subdomain: austin.example.com<br/>Query: ?city=austin"]
LOC_DB --> LOC_KW
LOC_DB --> LOC_URL
end
class LOC_DB storage
class LOC_KW,LOC_URL free
LOC_KW -.->|"used by"| K15
LOC_URL -.->|"used by"| U15
%% ==========================================================================
%% SELF-LEARNING LOOP
%% ==========================================================================
subgraph Learning["VECTORIZE SELF-LEARNING ✅ ALL PIPELINES"]
direction TB
VECT_DOMAIN[("domain-classifier<br/>7,100+ seeded<br/>+ learned from ALL sources")]
VECT_URL[("backlink-classifier<br/>URL patterns<br/>+ learned >= 65%")]
VECT_KW[("keyword-classifier<br/>30 seeded<br/>+ learned >= 70%")]
LEARN_D["Domain Learn<br/>✅ ALL sources >= 80%<br/>(rules + LLM)"]
LEARN_U["URL Learn<br/>conf >= 65% → upsert"]
LEARN_K["Keyword Learn<br/>conf >= 70% → upsert"]
end
class VECT_DOMAIN,VECT_URL,VECT_KW,LEARN_D,LEARN_U,LEARN_K learning
D6 -->|">= 80%"| LEARN_D
U5 -->|">= 65%"| LEARN_U
K4 -->|">= 70%"| LEARN_K
LEARN_D --> VECT_DOMAIN
LEARN_U --> VECT_URL
LEARN_K --> VECT_KW
VECT_DOMAIN -.->|"query"| D2
VECT_URL -.->|"query"| U2
VECT_KW -.->|"query"| K2
%% ==========================================================================
%% STORAGE
%% ==========================================================================
subgraph Storage["D1 STORAGE"]
direction LR
DOMAINS_DB[("domains<br/>tier1_type<br/>domain_type<br/>is_local_business<br/>primary_location_id<br/>confidence")]
URLS_DB[("urls<br/>page_type<br/>tactic_type<br/>channel_bucket<br/>location_id")]
KEYWORDS_DB2[("keywords<br/>journey_moment<br/>buyer_behavior<br/>role_context<br/>location_id<br/>+ 25 dimensions")]
end
class DOMAINS_DB,URLS_DB,KEYWORDS_DB2 storage
D6 --> DOMAINS_DB
U5 --> URLS_DB
K4 --> KEYWORDS_DB2
%% Admin console reads from storage
ADMIN_CLASS --> DOMAINS_DB
ADMIN_CLASS --> URLS_DB
ADMIN_CLASS --> KEYWORDS_DB2