[
  {
    "id": "nvidia-h100-sxm",
    "name": "NVIDIA H100 SXM5",
    "manufacturer": "NVIDIA",
    "architecture": "Hopper",
    "memory": "80GB HBM3",
    "memory_bandwidth": "3.35 TB/s",
    "fp16_tflops": 1979,
    "fp8_tflops": 3958,
    "interconnect": "NVLink4 (900GB/s)",
    "tpb_watt": 700,
    "released": "2022-10-01",
    "status": "Shipping (allocation constrained)",
    "pricing": {
      "retail_usd": "30000-40000",
      "cloud_per_hour": {
        "aws_p5_xlarge": 12.99,
        "azure_nd96asr_v4": 11.91,
        "gcp_a3_highgpu": 11.8,
        "coreweave": 2.23
      }
    },
    "availability": "Constrained — major allocations to hyperscalers and AI labs",
    "tags": [
      "training",
      "inference",
      "flagship"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "nvidia-h100-pcie",
    "name": "NVIDIA H100 PCIe",
    "manufacturer": "NVIDIA",
    "architecture": "Hopper",
    "memory": "80GB HBM3",
    "memory_bandwidth": "2.0 TB/s",
    "fp16_tflops": 1513,
    "fp8_tflops": 3026,
    "interconnect": "PCIe Gen5 (128GB/s)",
    "tpb_watt": 350,
    "released": "2023-04-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "25000-35000",
      "cloud_per_hour": {
        "coreweave": 1.99,
        "runpod": 1.98
      }
    },
    "availability": "Available — lower-power variant for inference and smaller training clusters",
    "tags": [
      "training",
      "inference",
      "pcie"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-h200-sxm",
    "name": "NVIDIA H200 SXM5",
    "manufacturer": "NVIDIA",
    "architecture": "Hopper (refresh)",
    "memory": "141GB HBM3e",
    "memory_bandwidth": "4.8 TB/s",
    "fp16_tflops": 1979,
    "fp8_tflops": 3958,
    "interconnect": "NVLink4 (900GB/s)",
    "tpb_watt": 700,
    "released": "2024-11-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "35000-45000",
      "cloud_per_hour": {
        "coreweave": 2.49,
        "lambda_labs": 2.49
      }
    },
    "availability": "Available via neoclouds; hyperscaler rollouts ongoing",
    "tags": [
      "training",
      "inference",
      "flagship",
      "hbm3e"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "nvidia-h200-pcie",
    "name": "NVIDIA H200 PCIe",
    "manufacturer": "NVIDIA",
    "architecture": "Hopper (refresh)",
    "memory": "141GB HBM3e",
    "memory_bandwidth": "4.0 TB/s",
    "fp16_tflops": 1513,
    "fp8_tflops": 3026,
    "interconnect": "PCIe Gen5 (128GB/s)",
    "tpb_watt": 350,
    "released": "2025-04-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "30000-40000",
      "cloud_per_hour": {
        "coreweave": 2.1,
        "runpod": 2.15
      }
    },
    "availability": "Available — inference-focused variant with high memory capacity",
    "tags": [
      "inference",
      "hbm3e",
      "pcie"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-b200",
    "name": "NVIDIA B200",
    "manufacturer": "NVIDIA",
    "architecture": "Blackwell",
    "memory": "192GB HBM3e",
    "memory_bandwidth": "8.0 TB/s",
    "fp16_tflops": 2250,
    "fp8_tflops": 4500,
    "fp4_tflops": 9000,
    "interconnect": "NVLink5 (1.8TB/s)",
    "tpb_watt": 1000,
    "released": "2025-03-01",
    "status": "Shipping (limited initial allocation)",
    "pricing": {
      "retail_usd": "40000-60000",
      "cloud_per_hour": {
        "coreweave": 3.5
      }
    },
    "availability": "Ramping — Q2 2026 volume increasing; GB200 systems shipping",
    "tags": [
      "training",
      "inference",
      "flagship",
      "blackwell",
      "fp4"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "nvidia-b100",
    "name": "NVIDIA B100",
    "manufacturer": "NVIDIA",
    "architecture": "Blackwell",
    "memory": "192GB HBM3e",
    "memory_bandwidth": "8.0 TB/s",
    "fp16_tflops": 1800,
    "fp8_tflops": 3600,
    "fp4_tflops": 7200,
    "interconnect": "NVLink5 (1.8TB/s)",
    "tpb_watt": 700,
    "released": "2025-06-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "30000-45000",
      "cloud_per_hour": {
        "coreweave": 2.8
      }
    },
    "availability": "Available — lower-power Blackwell variant for mainstream training and inference",
    "tags": [
      "training",
      "inference",
      "blackwell",
      "fp4"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-gb200-nvl72",
    "name": "NVIDIA GB200 NVL72",
    "manufacturer": "NVIDIA",
    "architecture": "Blackwell (Grace+Blackwell)",
    "memory": "13.8TB HBM3e (72 GPU system)",
    "memory_bandwidth": "8.0 TB/s per GPU",
    "fp16_tflops": 162000,
    "fp8_tflops": 324000,
    "fp4_tflops": 648000,
    "interconnect": "NVLink5 (1.8TB/s) + NVSwitch",
    "tpb_watt": "27000 (system)",
    "released": "2025-06-01",
    "status": "Shipping to select customers",
    "pricing": {
      "retail_usd": "3000000-3500000 (per NVL72 rack)",
      "cloud_per_hour": {
        "coreweave": "TBD"
      }
    },
    "availability": "Limited — Dell, Supermicro, Cisco shipping systems; volume ramp H2 2026",
    "tags": [
      "training",
      "frontier",
      "blackwell",
      "grace",
      "rack-scale"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "nvidia-gb200-nvl36",
    "name": "NVIDIA GB200 NVL36",
    "manufacturer": "NVIDIA",
    "architecture": "Blackwell (Grace+Blackwell)",
    "memory": "6.9TB HBM3e (36 GPU system)",
    "memory_bandwidth": "8.0 TB/s per GPU",
    "fp16_tflops": 81000,
    "fp8_tflops": 162000,
    "fp4_tflops": 324000,
    "interconnect": "NVLink5 (1.8TB/s) + NVSwitch",
    "tpb_watt": "13500 (system)",
    "released": "2025-09-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "1500000-1800000 (per NVL36 rack)",
      "cloud_per_hour": {
        "coreweave": "TBD"
      }
    },
    "availability": "Available — smaller rack-scale system for mid-size deployments",
    "tags": [
      "training",
      "inference",
      "blackwell",
      "grace",
      "rack-scale"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-l40s",
    "name": "NVIDIA L40S",
    "manufacturer": "NVIDIA",
    "architecture": "Ada Lovelace",
    "memory": "48GB GDDR6",
    "memory_bandwidth": "0.864 TB/s",
    "fp16_tflops": 362,
    "fp8_tflops": 733,
    "interconnect": "PCIe Gen4",
    "tpb_watt": 350,
    "released": "2023-08-01",
    "status": "Shipping (widely available)",
    "pricing": {
      "retail_usd": "8000-12000",
      "cloud_per_hour": {
        "coreweave": 0.8,
        "runpod": 0.79
      }
    },
    "availability": "Widely available — primary choice for inference workloads",
    "tags": [
      "inference",
      "available",
      "mid-range"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "nvidia-l40",
    "name": "NVIDIA L40",
    "manufacturer": "NVIDIA",
    "architecture": "Ada Lovelace",
    "memory": "48GB GDDR6",
    "memory_bandwidth": "0.864 TB/s",
    "fp16_tflops": 181,
    "fp8_tflops": 362,
    "interconnect": "PCIe Gen4",
    "tpb_watt": 300,
    "released": "2022-10-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "6000-9000",
      "cloud_per_hour": {
        "runpod": 0.65
      }
    },
    "availability": "Available — graphics and inference workloads, lower FP8 than L40S",
    "tags": [
      "inference",
      "available",
      "mid-range"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-l4",
    "name": "NVIDIA L4",
    "manufacturer": "NVIDIA",
    "architecture": "Ada Lovelace",
    "memory": "24GB GDDR6",
    "memory_bandwidth": "0.300 TB/s",
    "fp16_tflops": 121,
    "fp8_tflops": 242,
    "interconnect": "PCIe Gen4",
    "tpb_watt": 72,
    "released": "2023-01-01",
    "status": "Shipping (widely available)",
    "pricing": {
      "retail_usd": "2000-3500",
      "cloud_per_hour": {
        "gcp_t4_l4": 0.75,
        "runpod": 0.49
      }
    },
    "availability": "Widely available — low-power inference GPU for edge and cloud",
    "tags": [
      "inference",
      "available",
      "low-power",
      "edge"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-a100-sxm",
    "name": "NVIDIA A100 SXM4 80GB",
    "manufacturer": "NVIDIA",
    "architecture": "Ampere",
    "memory": "80GB HBM2e",
    "memory_bandwidth": "2.0 TB/s",
    "fp16_tflops": 624,
    "fp8_tflops": "N/A",
    "interconnect": "NVLink3 (600GB/s)",
    "tpb_watt": 400,
    "released": "2021-06-01",
    "status": "Shipping (mature, widely available)",
    "pricing": {
      "retail_usd": "10000-16000",
      "cloud_per_hour": {
        "aws_p4d": 3.22,
        "gcp_a2_megagpu": 3.67,
        "runpod": 0.75
      }
    },
    "availability": "Widely available — decreasing as Hopper/Blackwell ramp",
    "tags": [
      "training",
      "inference",
      "legacy",
      "available"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "nvidia-a100-pcie",
    "name": "NVIDIA A100 PCIe 80GB",
    "manufacturer": "NVIDIA",
    "architecture": "Ampere",
    "memory": "80GB HBM2e",
    "memory_bandwidth": "1.94 TB/s",
    "fp16_tflops": 312,
    "fp8_tflops": "N/A",
    "interconnect": "PCIe Gen4 (64GB/s)",
    "tpb_watt": 300,
    "released": "2021-06-01",
    "status": "Shipping (widely available)",
    "pricing": {
      "retail_usd": "8000-14000",
      "cloud_per_hour": {
        "runpod": 0.65,
        "vast_ai": 0.55
      }
    },
    "availability": "Widely available — PCIe variant for inference and smaller clusters",
    "tags": [
      "inference",
      "legacy",
      "available",
      "pcie"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-a30",
    "name": "NVIDIA A30",
    "manufacturer": "NVIDIA",
    "architecture": "Ampere",
    "memory": "24GB HBM2",
    "memory_bandwidth": "0.933 TB/s",
    "fp16_tflops": 165,
    "fp8_tflops": "N/A",
    "interconnect": "NVLink3 (600GB/s)",
    "tpb_watt": 165,
    "released": "2021-04-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "3000-5000",
      "cloud_per_hour": {}
    },
    "availability": "Available — mainstream inference GPU, lower cost than A100",
    "tags": [
      "inference",
      "available",
      "mid-range"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-a10",
    "name": "NVIDIA A10",
    "manufacturer": "NVIDIA",
    "architecture": "Ampere",
    "memory": "24GB GDDR6",
    "memory_bandwidth": "0.600 TB/s",
    "fp16_tflops": 125,
    "fp8_tflops": "N/A",
    "interconnect": "PCIe Gen4",
    "tpb_watt": 150,
    "released": "2021-04-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "2500-4000",
      "cloud_per_hour": {
        "runpod": 0.4
      }
    },
    "availability": "Available — inference and visualization GPU",
    "tags": [
      "inference",
      "available",
      "mid-range"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-t4",
    "name": "NVIDIA T4",
    "manufacturer": "NVIDIA",
    "architecture": "Turing",
    "memory": "16GB GDDR6",
    "memory_bandwidth": "0.320 TB/s",
    "fp16_tflops": 65,
    "fp8_tflops": "N/A",
    "interconnect": "PCIe Gen3",
    "tpb_watt": 70,
    "released": "2018-09-01",
    "status": "Shipping (legacy)",
    "pricing": {
      "retail_usd": "1000-2000",
      "cloud_per_hour": {
        "aws_g4dn": 0.526,
        "gcp_t4": 0.35
      }
    },
    "availability": "Widely available — entry-level inference, being phased out for L4",
    "tags": [
      "inference",
      "legacy",
      "low-power",
      "edge"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-v100",
    "name": "NVIDIA V100 SXM2",
    "manufacturer": "NVIDIA",
    "architecture": "Volta",
    "memory": "32GB HBM2",
    "memory_bandwidth": "0.900 TB/s",
    "fp16_tflops": 125,
    "fp8_tflops": "N/A",
    "interconnect": "NVLink2 (300GB/s)",
    "tpb_watt": 300,
    "released": "2017-06-01",
    "status": "End of life (still deployed)",
    "pricing": {
      "retail_usd": "2000-5000 (used)",
      "cloud_per_hour": {
        "vast_ai": 0.3
      }
    },
    "availability": "Legacy — still available on secondary market and some clouds; not recommended for new deployments",
    "tags": [
      "training",
      "inference",
      "legacy",
      "eol"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-b300",
    "name": "NVIDIA B300",
    "manufacturer": "NVIDIA",
    "architecture": "Blackwell Ultra",
    "memory": "288GB HBM3e",
    "memory_bandwidth": "8.0 TB/s",
    "fp16_tflops": 2500,
    "fp8_tflops": 5000,
    "fp4_tflops": 10000,
    "interconnect": "NVLink5 (1.8TB/s)",
    "tpb_watt": 1200,
    "released": "2026-05-01",
    "status": "Shipping (HGX B300 available now)",
    "pricing": {
      "retail_usd": "TBD",
      "cloud_per_hour": {
        "coreweave": "TBD"
      }
    },
    "availability": "Shipping now — HGX B300 platform in production. Current high-end shipping GPU, successor to B200 with 50% more HBM3e memory (288GB vs 192GB). Increased compute density for frontier-scale training.",
    "tags": [
      "training",
      "inference",
      "flagship",
      "blackwell-ultra",
      "fp4",
      "hbm3e"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-vera-rubin",
    "name": "NVIDIA Vera Rubin",
    "manufacturer": "NVIDIA",
    "architecture": "Vera Rubin",
    "memory": "TBD (expected 288GB+ HBM4)",
    "memory_bandwidth": "TBD (expected 13+ TB/s)",
    "fp16_tflops": "TBD",
    "fp8_tflops": "TBD",
    "fp4_tflops": "TBD (expected 40+ PFLOPS)",
    "interconnect": "NVLink6 (expected)",
    "tpb_watt": "TBD (expected 1400W+)",
    "released": "2026-H2 (expected)",
    "status": "Announced — shipping H2 2026 to hyperscalers only",
    "pricing": {
      "retail_usd": "TBD",
      "cloud_per_hour": {}
    },
    "availability": "Announced at GTC 2026. Renamed from 'Rubin' to 'Vera Rubin' at GTC 2026. Ships H2 2026 to hyperscalers ONLY (no general availability expected in 2026). Next-generation architecture succeeding Blackwell, featuring HBM4 memory.",
    "tags": [
      "training",
      "inference",
      "flagship",
      "vera-rubin",
      "upcoming",
      "hbm4"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "nvidia-rubin-ultra",
    "name": "NVIDIA Rubin Ultra",
    "manufacturer": "NVIDIA",
    "architecture": "Rubin Ultra",
    "memory": "TBD (expected 576GB+ HBM4e)",
    "memory_bandwidth": "TBD (expected 13+ TB/s)",
    "fp16_tflops": "TBD",
    "fp8_tflops": "TBD",
    "fp4_tflops": "TBD (expected 50+ PFLOPS)",
    "interconnect": "NVLink6 (expected)",
    "tpb_watt": "TBD (expected 1800W+)",
    "released": "2027-Q3 (expected)",
    "status": "Announced (not yet shipping)",
    "pricing": {
      "retail_usd": "TBD",
      "cloud_per_hour": {}
    },
    "availability": "Not yet available — announced at GTC 2025. Rubin Ultra is the follow-on to Vera Rubin, expected to push the 1-exaflop-per-rack boundary.",
    "tags": [
      "training",
      "inference",
      "flagship",
      "rubin",
      "upcoming",
      "hbm4e"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "amd-mi300x",
    "name": "AMD Instinct MI300X",
    "manufacturer": "AMD",
    "architecture": "CDNA 3",
    "memory": "192GB HBM3",
    "memory_bandwidth": "5.3 TB/s",
    "fp16_tflops": 1307,
    "fp8_tflops": 2614,
    "interconnect": "Infinity Fabric (896GB/s)",
    "tpb_watt": 750,
    "released": "2023-12-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "15000-20000",
      "cloud_per_hour": {
        "coreweave": 1.99
      }
    },
    "availability": "Available — gaining adoption; Meta, Microsoft, Oracle deploying",
    "tags": [
      "training",
      "inference",
      "amd",
      "alternative"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "amd-mi300a",
    "name": "AMD Instinct MI300A (APU)",
    "manufacturer": "AMD",
    "architecture": "CDNA 3",
    "memory": "128GB HBM3",
    "memory_bandwidth": "5.3 TB/s",
    "fp16_tflops": 1307,
    "fp8_tflops": 2614,
    "interconnect": "Infinity Fabric (896GB/s)",
    "tpb_watt": 760,
    "released": "2023-12-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "TBD (system-level)",
      "cloud_per_hour": {}
    },
    "availability": "Available — CPU+GPU APU used in El Capitan supercomputer (Lawrence Livermore)",
    "tags": [
      "training",
      "amd",
      "apu",
      "supercomputer",
      "alternative"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "amd-mi350",
    "name": "AMD Instinct MI350",
    "manufacturer": "AMD",
    "architecture": "CDNA 4 (3nm)",
    "memory": "288GB HBM3E",
    "memory_bandwidth": "8.0 TB/s",
    "fp16_tflops": 5000,
    "fp8_tflops": 10000,
    "interconnect": "Infinity Fabric (UPI)",
    "tpb_watt": 1000,
    "released": "2025-06-01",
    "status": "Shipping (limited initial allocation)",
    "pricing": {
      "retail_usd": "TBD",
      "cloud_per_hour": {}
    },
    "availability": "Ramping — 185 billion transistors, 3nm process. Direct B200 competitor. Early deployments at Meta, Microsoft.",
    "tags": [
      "training",
      "inference",
      "amd",
      "alternative",
      "flagship",
      "cdna4"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "amd-mi325x",
    "name": "AMD Instinct MI325X",
    "manufacturer": "AMD",
    "architecture": "CDNA 3 (refresh)",
    "memory": "288GB HBM3E",
    "memory_bandwidth": "6.0 TB/s",
    "fp16_tflops": 1307,
    "fp8_tflops": 2614,
    "interconnect": "Infinity Fabric (896GB/s)",
    "tpb_watt": 750,
    "released": "2024-11-01",
    "status": "Shipping",
    "pricing": {
      "retail_usd": "TBD",
      "cloud_per_hour": {
        "coreweave": 2.49
      }
    },
    "availability": "Available — MI300X refresh with 288GB HBM3E for large-model inference. Competitive with H200 on memory.",
    "tags": [
      "training",
      "inference",
      "amd",
      "alternative",
      "hbm3e"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "amd-mi400",
    "name": "AMD Instinct MI400 (Announced)",
    "manufacturer": "AMD",
    "architecture": "CDNA 5 (next-gen)",
    "memory": "TBD (expected 384GB+ HBM4)",
    "memory_bandwidth": "TBD (expected 10+ TB/s)",
    "fp16_tflops": "TBD",
    "fp8_tflops": "TBD",
    "interconnect": "Infinity Fabric (next-gen)",
    "tpb_watt": "TBD (expected 1200W+)",
    "released": "2027-Q2 (expected)",
    "status": "Announced (not yet shipping)",
    "pricing": {
      "retail_usd": "TBD",
      "cloud_per_hour": {}
    },
    "availability": "Not yet available — announced as Rubin competitor. Expected to feature HBM4 and compete with NVIDIA's next-gen Blackwell/Rubin.",
    "tags": [
      "training",
      "inference",
      "amd",
      "alternative",
      "flagship",
      "upcoming"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "intel-gaudi3",
    "name": "Intel Gaudi 3",
    "manufacturer": "Intel",
    "architecture": "Habana",
    "memory": "128GB HBM2e",
    "memory_bandwidth": "3.2 TB/s",
    "fp16_tflops": 1835,
    "fp8_tflops": 1835,
    "interconnect": "24x 200G RoCEv2",
    "tpb_watt": 600,
    "released": "2024-09-01",
    "status": "Shipping (limited)",
    "pricing": {
      "retail_usd": "125000 (8-GPU kit)",
      "cloud_per_hour": {}
    },
    "availability": "Limited — IBM Cloud and AWS announced; adoption unclear",
    "tags": [
      "training",
      "inference",
      "intel",
      "alternative",
      "limited"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "intel-gaudi2",
    "name": "Intel Gaudi 2",
    "manufacturer": "Intel",
    "architecture": "Habana",
    "memory": "96GB HBM2e",
    "memory_bandwidth": "2.45 TB/s",
    "fp16_tflops": 729,
    "fp8_tflops": 1458,
    "interconnect": "24x 100G RoCEv2",
    "tpb_watt": 600,
    "released": "2022-10-01",
    "status": "Shipping (mature)",
    "pricing": {
      "retail_usd": "TBD",
      "cloud_per_hour": {}
    },
    "availability": "Available — prior-gen Habana accelerator, deployed at Supermicro and Intel DevCloud",
    "tags": [
      "training",
      "inference",
      "intel",
      "alternative",
      "legacy"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "google-tpu-trillium",
    "name": "Google TPU Trillium (v6e)",
    "manufacturer": "Google",
    "architecture": "Trillium",
    "memory": "32GB HBM3",
    "memory_bandwidth": "1.6 TB/s",
    "fp16_tflops": 918,
    "fp8_tflops": 1836,
    "interconnect": "ICI (3D Torus)",
    "tpb_watt": "TBD",
    "released": "2024-10-01",
    "status": "Operational (GCP only)",
    "pricing": {
      "cloud_per_hour": {
        "gcp_ct6e": 2.7
      }
    },
    "availability": "GCP only — available to selected customers; expandable pods",
    "tags": [
      "training",
      "inference",
      "tpu",
      "google",
      "alternative"
    ],
    "last_updated": "2026-06-23"
  },
  {
    "id": "google-tpu-v5p",
    "name": "Google TPU v5p",
    "manufacturer": "Google",
    "architecture": "TPU v5 (Pod)",
    "memory": "95GB HBM3",
    "memory_bandwidth": "2.77 TB/s",
    "fp16_tflops": 459,
    "fp8_tflops": 918,
    "interconnect": "ICI (3D Torus, 6K pod)",
    "tpb_watt": "TBD",
    "released": "2023-12-01",
    "status": "Operational (GCP only)",
    "pricing": {
      "cloud_per_hour": {
        "gcp_v5p": 4.2
      }
    },
    "availability": "GCP only — largest TPU pod configuration (6,144 chips); available to selected customers",
    "tags": [
      "training",
      "inference",
      "tpu",
      "google",
      "alternative",
      "pod"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "google-tpu-v5e",
    "name": "Google TPU v5e",
    "manufacturer": "Google",
    "architecture": "TPU v5 (Lite)",
    "memory": "16GB HBM3",
    "memory_bandwidth": "0.81 TB/s",
    "fp16_tflops": 197,
    "fp8_tflops": 393,
    "interconnect": "ICI (2D Torus)",
    "tpb_watt": "TBD",
    "released": "2023-08-01",
    "status": "Operational (GCP only)",
    "pricing": {
      "cloud_per_hour": {
        "gcp_v5e": 1.2
      }
    },
    "availability": "GCP only — cost-efficient inference TPU for large-scale serving",
    "tags": [
      "inference",
      "tpu",
      "google",
      "alternative",
      "cost-efficient"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "google-tpu-v4",
    "name": "Google TPU v4",
    "manufacturer": "Google",
    "architecture": "TPU v4",
    "memory": "32GB HBM3",
    "memory_bandwidth": "1.2 TB/s",
    "fp16_tflops": 275,
    "fp8_tflops": "N/A",
    "interconnect": "ICI (3D Torus, 4K pod)",
    "tpb_watt": "TBD",
    "released": "2021-06-01",
    "status": "Operational (GCP only)",
    "pricing": {
      "cloud_per_hour": {
        "gcp_v4": 3.22
      }
    },
    "availability": "GCP only — prior-gen, still widely deployed for training and inference",
    "tags": [
      "training",
      "inference",
      "tpu",
      "google",
      "alternative",
      "legacy"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "google-ironwood",
    "name": "Google Ironwood (TPU v7)",
    "manufacturer": "Google",
    "architecture": "Ironwood",
    "memory": "TBD (expected 64GB+ HBM3e)",
    "memory_bandwidth": "TBD (expected 3+ TB/s)",
    "fp16_tflops": "TBD",
    "fp8_tflops": "TBD",
    "interconnect": "ICI (next-gen, expected)",
    "tpb_watt": "TBD",
    "released": "2027-Q1 (expected)",
    "status": "Announced (not yet shipping)",
    "pricing": {
      "cloud_per_hour": {}
    },
    "availability": "Not yet available — Google's next-gen TPU after Trillium. Expected to power Gemini 3 era training.",
    "tags": [
      "training",
      "inference",
      "tpu",
      "google",
      "alternative",
      "upcoming"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "cerebras-cs3",
    "name": "Cerebras CS-3 (Wafer-Scale)",
    "manufacturer": "Cerebras Systems",
    "architecture": "WSE-3 (wafer-scale)",
    "memory": "44GB SRAM (on-die) + external",
    "memory_bandwidth": "21 PB/s (on-die)",
    "fp16_tflops": 125,
    "fp8_tflops": 250,
    "interconnect": "MemoryX + SwarmX",
    "tpb_watt": "TBD (system)",
    "released": "2024-03-01",
    "status": "Shipping (limited)",
    "pricing": {
      "retail_usd": "TBD (system-level)",
      "cloud_per_hour": {}
    },
    "availability": "Limited — wafer-scale engine with 4 trillion transistors. Deployed at G42, Argonne National Lab, and select AI labs.",
    "tags": [
      "training",
      "inference",
      "cerebras",
      "wafer-scale",
      "alternative",
      "innovative"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "groq-lpu",
    "name": "Groq LPU (Inference Engine)",
    "manufacturer": "Groq",
    "architecture": "LPU (Tensor Streaming)",
    "memory": "TBD (SRAM-based)",
    "memory_bandwidth": "80 TB/s (on-die SRAM)",
    "fp16_tflops": 750,
    "fp8_tflops": 1500,
    "interconnect": "GroqLink (multi-chip)",
    "tpb_watt": "300",
    "released": "2024-02-01",
    "status": "Shipping (limited)",
    "pricing": {
      "retail_usd": "TBD (system-level)",
      "cloud_per_hour": {}
    },
    "availability": "Limited — extreme inference throughput (tokens/sec) for LLMs. Deployed at GroqCloud for fast inference API.",
    "tags": [
      "inference",
      "groq",
      "alternative",
      "innovative",
      "high-throughput"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "sambanova-sn40l",
    "name": "SambaNova SN40L",
    "manufacturer": "SambaNova Systems",
    "architecture": "RDU (Reconfigurable Dataflow)",
    "memory": "64GB HBM3 + 1.5TB DDR5",
    "memory_bandwidth": "1.7 TB/s (HBM)",
    "fp16_tflops": 638,
    "fp8_tflops": 1275,
    "interconnect": "RDU-Connect",
    "tpb_watt": "TBD",
    "released": "2023-12-01",
    "status": "Shipping (limited)",
    "pricing": {
      "retail_usd": "TBD (system-level)",
      "cloud_per_hour": {}
    },
    "availability": "Limited — dataflow architecture optimized for large-model inference. Deployed at Argonne National Lab and LLNL.",
    "tags": [
      "training",
      "inference",
      "sambanova",
      "alternative",
      "reconfigurable"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "tenstorrent-grayskull",
    "name": "Tenstorrent Grayskull",
    "manufacturer": "Tenstorrent",
    "architecture": "Wormhole (RISC-V)",
    "memory": "32GB LPDDR4X",
    "memory_bandwidth": "0.137 TB/s",
    "fp16_tflops": 233,
    "fp8_tflops": 466,
    "interconnect": "WH-Connect (Ethernet-based)",
    "tpb_watt": "200",
    "released": "2024-06-01",
    "status": "Shipping (limited)",
    "pricing": {
      "retail_usd": "1500 (dev kit)",
      "cloud_per_hour": {}
    },
    "availability": "Limited — RISC-V based AI accelerator focused on cost-efficient training and inference. Developer kits available.",
    "tags": [
      "training",
      "inference",
      "tenstorrent",
      "alternative",
      "risc-v",
      "low-cost"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "huawei-ascend-910b",
    "name": "Huawei Ascend 910B",
    "manufacturer": "Huawei",
    "architecture": "Da Vinci (7nm)",
    "memory": "64GB HBM2e",
    "memory_bandwidth": "1.6 TB/s",
    "fp16_tflops": 320,
    "fp8_tflops": 640,
    "interconnect": "HCCS (300GB/s)",
    "tpb_watt": "400",
    "released": "2023-08-01",
    "status": "Shipping (China domestic only)",
    "pricing": {
      "retail_usd": "TBD (China only)",
      "cloud_per_hour": {}
    },
    "availability": "China domestic market only — Huawei's primary AI accelerator replacing NVIDIA GPUs under US export controls",
    "tags": [
      "training",
      "inference",
      "huawei",
      "china",
      "alternative",
      "export-controlled"
    ],
    "last_updated": "2026-06-24"
  },
  {
    "id": "huawei-ascend-910c",
    "name": "Huawei Ascend 910C",
    "manufacturer": "Huawei",
    "architecture": "Da Vinci (enhanced)",
    "memory": "80GB HBM3",
    "memory_bandwidth": "2.0 TB/s",
    "fp16_tflops": 400,
    "fp8_tflops": 800,
    "interconnect": "HCCS (next-gen)",
    "tpb_watt": "450",
    "released": "2025-12-01",
    "status": "Shipping (China domestic only)",
    "pricing": {
      "retail_usd": "TBD (China only)",
      "cloud_per_hour": {}
    },
    "availability": "China domestic market only — enhanced Ascend for frontier-scale training; reportedly used by DeepSeek and other Chinese AI labs",
    "tags": [
      "training",
      "inference",
      "huawei",
      "china",
      "alternative",
      "export-controlled"
    ],
    "last_updated": "2026-06-24"
  }
]