mirror of
https://github.com/saymrwulf/CertTransparencySearch.git
synced 2026-05-14 20:37:52 +00:00
Integrate CAA analysis into monograph
This commit is contained in:
parent
696041a5d1
commit
ca317e7ba9
4 changed files with 746 additions and 32 deletions
2
Makefile
2
Makefile
|
|
@ -4,6 +4,7 @@ DOMAINS ?= domains.local.txt
|
|||
FOCUS_SUBJECTS ?= focus_subjects.local.txt
|
||||
CACHE_TTL ?= 0
|
||||
DNS_CACHE_TTL ?= 86400
|
||||
CAA_CACHE_TTL ?= 86400
|
||||
MAX_CANDIDATES ?= 10000
|
||||
|
||||
.PHONY: bootstrap install init-config inventory purpose lineage consolidated monograph all
|
||||
|
|
@ -60,6 +61,7 @@ monograph:
|
|||
--focus-subjects-file $(FOCUS_SUBJECTS) \
|
||||
--cache-ttl-seconds $(CACHE_TTL) \
|
||||
--dns-cache-ttl-seconds $(DNS_CACHE_TTL) \
|
||||
--caa-cache-ttl-seconds $(CAA_CACHE_TTL) \
|
||||
--max-candidates-per-domain $(MAX_CANDIDATES) \
|
||||
--markdown-output output/corpus/monograph.md \
|
||||
--latex-output output/corpus/monograph.tex \
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ This project builds a publication-grade monograph from Certificate Transparency
|
|||
- it verifies locally that the certificates are real leaf certificates rather than CA certificates or precertificates
|
||||
- it assesses intended usage from EKU and KeyUsage
|
||||
- it scans the DNS names exposed by the SAN corpus
|
||||
- it evaluates the effective CAA policy for those DNS names to show where public CA issuance is governed, delegated, or unrestricted
|
||||
- it can analyse a second local-only Subject-CN cohort file against the wider estate
|
||||
- it produces one primary readable output set: a monograph in Markdown, LaTeX, and PDF
|
||||
|
||||
|
|
@ -32,7 +33,7 @@ None of those paths should be committed.
|
|||
|
||||
- `python3`: runs the scanners and report generators
|
||||
- `make`: gives you short repeatable commands instead of long manual command lines
|
||||
- `dig`: performs the live DNS scan
|
||||
- `dig`: performs the live DNS and CAA scans
|
||||
- `xelatex`: compiles the PDF reports
|
||||
|
||||
If `xelatex` is missing, the Markdown and LaTeX outputs can still be generated, but the PDF targets will fail.
|
||||
|
|
@ -162,18 +163,20 @@ The default `Makefile` values are:
|
|||
- `FOCUS_SUBJECTS=focus_subjects.local.txt`
|
||||
- `CACHE_TTL=0`
|
||||
- `DNS_CACHE_TTL=86400`
|
||||
- `CAA_CACHE_TTL=86400`
|
||||
- `MAX_CANDIDATES=10000`
|
||||
|
||||
This means:
|
||||
|
||||
- Certificate Transparency is refreshed live on every normal run.
|
||||
- DNS results are reused for up to one day unless you override the DNS cache TTL.
|
||||
- CAA results are reused for up to one day unless you override the CAA cache TTL.
|
||||
- The query cap is high enough for the current corpus and the scanner will refuse to run if the live raw match count exceeds the cap.
|
||||
|
||||
If you want to override values:
|
||||
|
||||
```bash
|
||||
make monograph CACHE_TTL=86400 DNS_CACHE_TTL=86400
|
||||
make monograph CACHE_TTL=86400 DNS_CACHE_TTL=86400 CAA_CACHE_TTL=86400
|
||||
```
|
||||
|
||||
Or:
|
||||
|
|
@ -250,6 +253,7 @@ This is only needed if you want the raw family inventory outside the monograph:
|
|||
--focus-subjects-file focus_subjects.local.txt \
|
||||
--cache-ttl-seconds 0 \
|
||||
--dns-cache-ttl-seconds 86400 \
|
||||
--caa-cache-ttl-seconds 86400 \
|
||||
--max-candidates-per-domain 10000 \
|
||||
--markdown-output output/corpus/monograph.md \
|
||||
--latex-output output/corpus/monograph.tex \
|
||||
|
|
@ -262,6 +266,7 @@ This is only needed if you want the raw family inventory outside the monograph:
|
|||
- `ct_usage_assessment.py`: EKU and KeyUsage assessment
|
||||
- `ct_lineage_report.py`: historical Subject CN, Subject DN, issuer, SAN, and issuance-burst analysis
|
||||
- `ct_dns_utils.py`: DNS scanning and provider-signature logic
|
||||
- `ct_caa_analysis.py`: CAA discovery, caching, and issuance-policy analysis
|
||||
- `ct_master_report.py`: shorter consolidated report
|
||||
- `ct_monograph_report.py`: publication-grade monograph with embedded appendices
|
||||
- `Makefile`: reproducible operator workflow
|
||||
|
|
|
|||
275
ct_caa_analysis.py
Normal file
275
ct_caa_analysis.py
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import asdict, dataclass
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import ct_dns_utils
|
||||
import ct_scan
|
||||
|
||||
|
||||
@dataclass
|
||||
class CaaObservation:
|
||||
name: str
|
||||
effective_rr_owner: str | None
|
||||
source_kind: str
|
||||
source_label: str | None
|
||||
aliases_seen: list[str]
|
||||
caa_rows: list[tuple[int, str, str]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CaaNameRow:
|
||||
name: str
|
||||
zone: str
|
||||
source_kind: str
|
||||
effective_rr_owner: str | None
|
||||
source_label: str | None
|
||||
aliases_seen: list[str]
|
||||
issue_values: list[str]
|
||||
issuewild_values: list[str]
|
||||
iodef_values: list[str]
|
||||
allowed_ca_families: list[str]
|
||||
current_covering_families: list[str]
|
||||
current_covering_subject_cns: list[str]
|
||||
current_covering_cert_count: int
|
||||
current_multi_family_overlap: bool
|
||||
current_policy_mismatch: bool
|
||||
mismatch_families: list[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CaaAnalysis:
|
||||
generated_at_utc: str
|
||||
configured_domains: list[str]
|
||||
total_names: int
|
||||
rows: list[CaaNameRow]
|
||||
source_kind_counts: Counter[str]
|
||||
zone_counts: Counter[str]
|
||||
multi_family_overlap_names: list[str]
|
||||
policy_mismatch_names: list[str]
|
||||
|
||||
|
||||
def normalize_dns_name(value: str) -> str:
|
||||
value = value.strip()
|
||||
if value.upper().startswith("DNS:"):
|
||||
return ct_dns_utils.normalize_name(value[4:])
|
||||
return ct_dns_utils.normalize_name(value)
|
||||
|
||||
|
||||
def issuer_family(names: set[str]) -> str:
|
||||
lowered = " ".join(sorted(names)).lower()
|
||||
if "amazon" in lowered:
|
||||
return "Amazon"
|
||||
if "google trust services" in lowered or "cn=we1" in lowered:
|
||||
return "Google Trust Services"
|
||||
if "sectigo" in lowered or "comodo" in lowered:
|
||||
return "Sectigo/COMODO"
|
||||
if any(token in lowered for token in ["digicert", "quovadis", "thawte", "geotrust", "rapidssl", "symantec", "verisign"]):
|
||||
return "DigiCert/QuoVadis"
|
||||
return "Other"
|
||||
|
||||
|
||||
def classify_zone(name: str, configured_domains: list[str]) -> str:
|
||||
for domain in sorted(configured_domains, key=len, reverse=True):
|
||||
lowered_domain = domain.lower()
|
||||
if name == lowered_domain or name.endswith(f".{lowered_domain}"):
|
||||
return lowered_domain
|
||||
return "other"
|
||||
|
||||
|
||||
def cache_path(cache_dir: Path, name: str) -> Path:
|
||||
return cache_dir / ct_dns_utils.cache_key(f"caa-{name}")
|
||||
|
||||
|
||||
def serialize_observation(observation: CaaObservation) -> dict[str, Any]:
|
||||
return {
|
||||
"name": observation.name,
|
||||
"effective_rr_owner": observation.effective_rr_owner,
|
||||
"source_kind": observation.source_kind,
|
||||
"source_label": observation.source_label,
|
||||
"aliases_seen": observation.aliases_seen,
|
||||
"caa_rows": [list(row) for row in observation.caa_rows],
|
||||
}
|
||||
|
||||
|
||||
def deserialize_observation(payload: dict[str, Any]) -> CaaObservation:
|
||||
return CaaObservation(
|
||||
name=payload["name"],
|
||||
effective_rr_owner=payload.get("effective_rr_owner"),
|
||||
source_kind=payload["source_kind"],
|
||||
source_label=payload.get("source_label"),
|
||||
aliases_seen=list(payload.get("aliases_seen", [])),
|
||||
caa_rows=[(int(flag), str(tag), str(value)) for flag, tag, value in payload.get("caa_rows", [])],
|
||||
)
|
||||
|
||||
|
||||
def parse_caa_response(lines: list[str]) -> tuple[list[tuple[int, str, str]], list[str]]:
|
||||
rows: list[tuple[int, str, str]] = []
|
||||
aliases: list[str] = []
|
||||
for line in lines:
|
||||
parts = line.split(maxsplit=2)
|
||||
if len(parts) == 3 and parts[0].isdigit():
|
||||
flag, tag, value = parts
|
||||
rows.append((int(flag), tag.lower(), value.strip().strip('"').lower()))
|
||||
elif line.endswith("."):
|
||||
aliases.append(ct_dns_utils.normalize_name(line))
|
||||
return rows, aliases
|
||||
|
||||
|
||||
def query_caa_lines(name: str) -> list[str]:
|
||||
output = ct_dns_utils.run_dig(name, "CAA", short=True)
|
||||
return [line.strip() for line in output.splitlines() if line.strip()]
|
||||
|
||||
|
||||
def relevant_caa_live(name: str) -> CaaObservation:
|
||||
labels = name.rstrip(".").lower().split(".")
|
||||
for index in range(len(labels)):
|
||||
candidate = ".".join(labels[index:])
|
||||
rows, aliases = parse_caa_response(query_caa_lines(candidate))
|
||||
if rows:
|
||||
if index == 0:
|
||||
source_kind = "alias_target" if aliases else "exact"
|
||||
else:
|
||||
source_kind = "parent_alias_target" if aliases else "parent"
|
||||
return CaaObservation(
|
||||
name=name,
|
||||
effective_rr_owner=candidate,
|
||||
source_kind=source_kind,
|
||||
source_label=aliases[-1] if aliases else candidate,
|
||||
aliases_seen=aliases,
|
||||
caa_rows=rows,
|
||||
)
|
||||
return CaaObservation(
|
||||
name=name,
|
||||
effective_rr_owner=None,
|
||||
source_kind="none",
|
||||
source_label=None,
|
||||
aliases_seen=[],
|
||||
caa_rows=[],
|
||||
)
|
||||
|
||||
|
||||
def scan_name_cached(name: str, cache_dir: Path, ttl_seconds: int) -> CaaObservation:
|
||||
key = cache_path(cache_dir, name).name
|
||||
cached = ct_dns_utils.load_json_cache(cache_dir, key, ttl_seconds)
|
||||
if cached is not None:
|
||||
cached.pop("cached_at", None)
|
||||
return deserialize_observation(cached)
|
||||
observation = relevant_caa_live(name)
|
||||
ct_dns_utils.store_json_cache(cache_dir, key, serialize_observation(observation))
|
||||
return observation
|
||||
|
||||
|
||||
def allowed_ca_families(caa_rows: list[tuple[int, str, str]]) -> list[str]:
|
||||
families: set[str] = set()
|
||||
for _flag, tag, value in caa_rows:
|
||||
if tag != "issue":
|
||||
continue
|
||||
normalized = value[:-1] if value.endswith(".") else value
|
||||
if any(token in normalized for token in ["amazon.com", "amazontrust.com", "awstrust.com", "amazonaws.com", "aws.amazon.com"]):
|
||||
families.add("Amazon")
|
||||
if any(token in normalized for token in ["sectigo.com", "comodoca.com", "comodo.com"]):
|
||||
families.add("Sectigo/COMODO")
|
||||
if any(token in normalized for token in ["digicert.com", "digicert.ne.jp", "thawte.com", "geotrust.com", "rapidssl.com", "symantec.com", "quovadisglobal.com", "digitalcertvalidation.com"]):
|
||||
families.add("DigiCert/QuoVadis")
|
||||
if "pki.goog" in normalized:
|
||||
families.add("Google Trust Services")
|
||||
if "letsencrypt.org" in normalized:
|
||||
families.add("Let's Encrypt")
|
||||
if any(token in normalized for token in ["telia.com", "telia.fi", "telia.se"]):
|
||||
families.add("Telia")
|
||||
return sorted(families)
|
||||
|
||||
|
||||
def issue_values(caa_rows: list[tuple[int, str, str]], tag: str) -> list[str]:
|
||||
return sorted({value for _flag, row_tag, value in caa_rows if row_tag == tag})
|
||||
|
||||
|
||||
def build_analysis(
|
||||
hits: list[ct_scan.CertificateHit],
|
||||
configured_domains: list[str],
|
||||
cache_dir: Path,
|
||||
ttl_seconds: int,
|
||||
) -> CaaAnalysis:
|
||||
names = sorted(
|
||||
{
|
||||
normalize_dns_name(entry)
|
||||
for hit in hits
|
||||
for entry in hit.san_entries
|
||||
if normalize_dns_name(entry)
|
||||
}
|
||||
)
|
||||
coverage: dict[str, list[tuple[str, str]]] = defaultdict(list)
|
||||
for hit in hits:
|
||||
family = issuer_family(hit.issuer_names)
|
||||
subject_cn = normalize_dns_name(hit.subject_cn)
|
||||
for entry in hit.san_entries:
|
||||
coverage[normalize_dns_name(entry)].append((subject_cn, family))
|
||||
|
||||
rows: list[CaaNameRow] = []
|
||||
for name in names:
|
||||
observation = scan_name_cached(name, cache_dir, ttl_seconds)
|
||||
allowed_families = allowed_ca_families(observation.caa_rows)
|
||||
current_families = sorted({family for _subject, family in coverage[name]})
|
||||
mismatch_families = sorted(family for family in current_families if allowed_families and family not in allowed_families)
|
||||
rows.append(
|
||||
CaaNameRow(
|
||||
name=name,
|
||||
zone=classify_zone(name, configured_domains),
|
||||
source_kind=observation.source_kind,
|
||||
effective_rr_owner=observation.effective_rr_owner,
|
||||
source_label=observation.source_label,
|
||||
aliases_seen=observation.aliases_seen,
|
||||
issue_values=issue_values(observation.caa_rows, "issue"),
|
||||
issuewild_values=issue_values(observation.caa_rows, "issuewild"),
|
||||
iodef_values=issue_values(observation.caa_rows, "iodef"),
|
||||
allowed_ca_families=allowed_families,
|
||||
current_covering_families=current_families,
|
||||
current_covering_subject_cns=sorted({subject for subject, _family in coverage[name]}),
|
||||
current_covering_cert_count=len(coverage[name]),
|
||||
current_multi_family_overlap=len(current_families) > 1,
|
||||
current_policy_mismatch=bool(mismatch_families),
|
||||
mismatch_families=mismatch_families,
|
||||
)
|
||||
)
|
||||
|
||||
return CaaAnalysis(
|
||||
generated_at_utc=ct_scan.utc_iso(datetime.now(UTC)),
|
||||
configured_domains=sorted(configured_domains),
|
||||
total_names=len(rows),
|
||||
rows=rows,
|
||||
source_kind_counts=Counter(row.source_kind for row in rows),
|
||||
zone_counts=Counter(row.zone for row in rows),
|
||||
multi_family_overlap_names=sorted(row.name for row in rows if row.current_multi_family_overlap),
|
||||
policy_mismatch_names=sorted(row.name for row in rows if row.current_policy_mismatch),
|
||||
)
|
||||
|
||||
|
||||
def rows_for_zone(analysis: CaaAnalysis, zone: str) -> list[CaaNameRow]:
|
||||
return [row for row in analysis.rows if row.zone == zone]
|
||||
|
||||
|
||||
def policy_counter(rows: list[CaaNameRow]) -> Counter[tuple[str, ...]]:
|
||||
counter: Counter[tuple[str, ...]] = Counter()
|
||||
for row in rows:
|
||||
key = tuple(row.allowed_ca_families) if row.allowed_ca_families else ("UNRESTRICTED",)
|
||||
counter[key] += 1
|
||||
return counter
|
||||
|
||||
|
||||
def serialize_analysis(analysis: CaaAnalysis) -> dict[str, Any]:
|
||||
return {
|
||||
"generated_at_utc": analysis.generated_at_utc,
|
||||
"configured_domains": analysis.configured_domains,
|
||||
"total_names": analysis.total_names,
|
||||
"rows": [asdict(row) for row in analysis.rows],
|
||||
"source_kind_counts": dict(analysis.source_kind_counts),
|
||||
"zone_counts": dict(analysis.zone_counts),
|
||||
"multi_family_overlap_names": analysis.multi_family_overlap_names,
|
||||
"policy_mismatch_names": analysis.policy_mismatch_names,
|
||||
}
|
||||
|
|
@ -6,6 +6,7 @@ import argparse
|
|||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
import ct_caa_analysis
|
||||
import ct_dns_utils
|
||||
import ct_focus_subjects
|
||||
import ct_lineage_report
|
||||
|
|
@ -20,10 +21,12 @@ def parse_args() -> argparse.Namespace:
|
|||
parser.add_argument("--domains-file", type=Path, default=Path("domains.local.txt"))
|
||||
parser.add_argument("--cache-dir", type=Path, default=Path(".cache/ct-search"))
|
||||
parser.add_argument("--dns-cache-dir", type=Path, default=Path(".cache/dns-scan"))
|
||||
parser.add_argument("--caa-cache-dir", type=Path, default=Path(".cache/caa-scan"))
|
||||
parser.add_argument("--history-cache-dir", type=Path, default=Path(".cache/ct-history-v2"))
|
||||
parser.add_argument("--focus-subjects-file", type=Path, default=Path("focus_subjects.local.txt"))
|
||||
parser.add_argument("--cache-ttl-seconds", type=int, default=0)
|
||||
parser.add_argument("--dns-cache-ttl-seconds", type=int, default=86400)
|
||||
parser.add_argument("--caa-cache-ttl-seconds", type=int, default=86400)
|
||||
parser.add_argument("--max-candidates-per-domain", type=int, default=10000)
|
||||
parser.add_argument("--retries", type=int, default=3)
|
||||
parser.add_argument("--markdown-output", type=Path, default=Path("output/corpus/monograph.md"))
|
||||
|
|
@ -239,6 +242,130 @@ def overlap_signal(details: str) -> str:
|
|||
return truncate_text("; ".join(parts) if parts else details, 108)
|
||||
|
||||
|
||||
def caa_source_label(source_kind: str) -> str:
|
||||
return {
|
||||
"exact": "Exact-name CAA",
|
||||
"alias_target": "Alias-target CAA",
|
||||
"parent": "Inherited parent CAA",
|
||||
"parent_alias_target": "Inherited parent CAA reached through alias following",
|
||||
"none": "No CAA found",
|
||||
}.get(source_kind, source_kind)
|
||||
|
||||
|
||||
def caa_policy_label(families: tuple[str, ...]) -> str:
|
||||
if families == ("UNRESTRICTED",):
|
||||
return "No published CAA restriction"
|
||||
if families == ("Amazon",):
|
||||
return "Amazon-only issuance policy"
|
||||
if families == ("DigiCert/QuoVadis", "Sectigo/COMODO"):
|
||||
return "Corporate broad policy"
|
||||
if families == ("Amazon", "DigiCert/QuoVadis", "Sectigo/COMODO"):
|
||||
return "Mixed corporate-plus-Amazon policy"
|
||||
if families == ("Google Trust Services", "Sectigo/COMODO"):
|
||||
return "Google plus Sectigo policy"
|
||||
if "Let's Encrypt" in families or "Telia" in families:
|
||||
return "Vendor-delegated broad policy"
|
||||
return "Mixed named policy"
|
||||
|
||||
|
||||
def caa_policy_explanation(families: tuple[str, ...]) -> str:
|
||||
if families == ("UNRESTRICTED",):
|
||||
return "No CAA restriction is published, so WebPKI issuance is not limited by DNS policy."
|
||||
if families == ("Amazon",):
|
||||
return "Only Amazon Trust Services identifiers are authorized by DNS policy."
|
||||
if families == ("DigiCert/QuoVadis", "Sectigo/COMODO"):
|
||||
return "The name inherits the broad corporate policy that permits the main non-Amazon public CA families seen in this estate."
|
||||
if families == ("Amazon", "DigiCert/QuoVadis", "Sectigo/COMODO"):
|
||||
return "The name permits both the broad corporate CA set and Amazon Trust Services."
|
||||
if families == ("Google Trust Services", "Sectigo/COMODO"):
|
||||
return "This is a narrow exception that permits Google Trust Services alongside the Sectigo lineage."
|
||||
if "Let's Encrypt" in families or "Telia" in families:
|
||||
return "The allowed CA set is wider and looks delegated to a specialist external platform or vendor."
|
||||
return "The DNS policy allows a mixed set of public CA families."
|
||||
|
||||
|
||||
def service_anchor_label(name: str, zone: str) -> str:
|
||||
if zone == "other":
|
||||
return name
|
||||
if name == zone:
|
||||
return zone
|
||||
relative = name[: -(len(zone) + 1)]
|
||||
parts = relative.split(".")
|
||||
if not parts:
|
||||
return zone
|
||||
return parts[-1]
|
||||
|
||||
|
||||
def caa_zone_policy_rows(
|
||||
analysis: ct_caa_analysis.CaaAnalysis,
|
||||
zone: str,
|
||||
) -> list[list[str]]:
|
||||
rows = ct_caa_analysis.rows_for_zone(analysis, zone)
|
||||
policy_counts = ct_caa_analysis.policy_counter(rows)
|
||||
return [
|
||||
[
|
||||
caa_policy_label(policy),
|
||||
str(count),
|
||||
caa_policy_explanation(policy),
|
||||
]
|
||||
for policy, count in policy_counts.most_common()
|
||||
]
|
||||
|
||||
|
||||
def caa_source_rows(analysis: ct_caa_analysis.CaaAnalysis) -> list[list[str]]:
|
||||
return [
|
||||
[
|
||||
caa_source_label(source_kind),
|
||||
str(count),
|
||||
{
|
||||
"exact": "The queried DNS name itself published the effective CAA.",
|
||||
"alias_target": "The queried DNS name resolved through an alias and the effective CAA came from what that alias chain exposed.",
|
||||
"parent": "The leaf name had no CAA, so issuance policy was inherited from a parent DNS node.",
|
||||
"parent_alias_target": "The leaf name inherited from a parent DNS node, and that parent policy was itself exposed through an alias response.",
|
||||
"none": "No effective CAA was found at the name or its parents.",
|
||||
}.get(source_kind, "CAA discovery result."),
|
||||
]
|
||||
for source_kind, count in analysis.source_kind_counts.most_common()
|
||||
]
|
||||
|
||||
|
||||
def top_caa_overlap_rows(analysis: ct_caa_analysis.CaaAnalysis, limit: int = 15) -> list[list[str]]:
|
||||
rows = [row for row in analysis.rows if row.current_multi_family_overlap]
|
||||
ordered = sorted(rows, key=lambda row: (row.zone, service_anchor_label(row.name, row.zone), row.name))
|
||||
return [
|
||||
[
|
||||
row.name,
|
||||
row.zone,
|
||||
", ".join(row.current_covering_families),
|
||||
truncate_text(", ".join(row.current_covering_subject_cns), 72),
|
||||
]
|
||||
for row in ordered[:limit]
|
||||
]
|
||||
|
||||
|
||||
def top_caa_mismatch_rows(analysis: ct_caa_analysis.CaaAnalysis, limit: int = 15) -> list[list[str]]:
|
||||
rows = [row for row in analysis.rows if row.current_policy_mismatch]
|
||||
ordered = sorted(rows, key=lambda row: (row.zone, service_anchor_label(row.name, row.zone), row.name))
|
||||
return [
|
||||
[
|
||||
row.name,
|
||||
row.zone,
|
||||
", ".join(row.current_covering_families),
|
||||
", ".join(row.allowed_ca_families) or "UNRESTRICTED",
|
||||
caa_source_label(row.source_kind),
|
||||
]
|
||||
for row in ordered[:limit]
|
||||
]
|
||||
|
||||
|
||||
def caa_concentration_text(analysis: ct_caa_analysis.CaaAnalysis, zone: str) -> str:
|
||||
rows = [row for row in ct_caa_analysis.rows_for_zone(analysis, zone) if row.current_policy_mismatch or row.current_multi_family_overlap]
|
||||
if not rows:
|
||||
return "none"
|
||||
counts = Counter(service_anchor_label(row.name, zone) for row in rows)
|
||||
return ", ".join(f"{label} ({count})" for label, count in counts.most_common(6))
|
||||
|
||||
|
||||
def focus_comparison_rows(focus_analysis: ct_focus_subjects.FocusCohortAnalysis) -> list[list[str]]:
|
||||
return [
|
||||
[
|
||||
|
|
@ -447,6 +574,7 @@ def render_markdown(
|
|||
args: argparse.Namespace,
|
||||
report: dict[str, object],
|
||||
assessment: ct_lineage_report.HistoricalAssessment,
|
||||
caa_analysis: ct_caa_analysis.CaaAnalysis,
|
||||
focus_analysis: ct_focus_subjects.FocusCohortAnalysis | None,
|
||||
) -> None:
|
||||
args.markdown_output.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
|
@ -540,9 +668,17 @@ def render_markdown(
|
|||
focus_bucket_summary = focus_bucket_summary_rows(focus_analysis) if focus_analysis else []
|
||||
focus_representatives = focus_representative_rows(focus_analysis) if focus_analysis else []
|
||||
has_focus = focus_analysis is not None
|
||||
synthesis_chapter = 8 if has_focus else 7
|
||||
limits_chapter = 9 if has_focus else 8
|
||||
detailed_inventory_appendix = "D" if has_focus else "C"
|
||||
caa_zone_rows = {
|
||||
zone: caa_zone_policy_rows(caa_analysis, zone)
|
||||
for zone in caa_analysis.configured_domains
|
||||
}
|
||||
primary_zone = report["domains"][0] if report["domains"] else "configured primary zone"
|
||||
secondary_zone = report["domains"][1] if len(report["domains"]) > 1 else None
|
||||
synthesis_chapter = 9 if has_focus else 8
|
||||
limits_chapter = 10 if has_focus else 9
|
||||
caa_appendix = "C"
|
||||
focus_appendix = "D" if has_focus else None
|
||||
detailed_inventory_appendix = "E" if has_focus else "D"
|
||||
lines: list[str] = []
|
||||
lines.append("# CT and DNS Monograph")
|
||||
lines.append("")
|
||||
|
|
@ -558,7 +694,8 @@ def render_markdown(
|
|||
f"- **{purpose_summary.category_counts.get('tls_server_only', 0)}** certificates are ordinary public TLS server certificates, while **{purpose_summary.category_counts.get('tls_server_and_client', 0)}** come from templates that also permit client-certificate use.",
|
||||
f"- **{historical_count}** historical leaf certificates show how these names evolved over time, including expired renewal history.",
|
||||
f"- **{len(report['unique_dns_names'])}** unique DNS SAN names were scanned live.",
|
||||
"- The estate is best understood as several layers laid on top of one another: brand naming, service naming, platform naming, delivery-stack naming, and migration residue.",
|
||||
f"- **{caa_analysis.total_names}** DNS names were also assessed for effective CAA policy, revealing where issuance is centrally governed, delegated, or left unrestricted.",
|
||||
"- The estate is best understood as several layers laid on top of one another: brand naming, service naming, platform naming, delivery-stack naming, issuance-policy control, and migration residue.",
|
||||
]
|
||||
)
|
||||
lines.append("")
|
||||
|
|
@ -570,8 +707,9 @@ def render_markdown(
|
|||
"- Read Chapters 2 and 3 if you want the current certificate-side story: issuers, trust, and purpose.",
|
||||
"- Read Chapter 4 if you want the historical lifecycle view and the red flags split into current versus fixed-in-the-past.",
|
||||
"- Read Chapters 5 and 6 if you want the naming and DNS story.",
|
||||
"- Read Chapter 7 if you want the issuance-policy view: which public CAs are authorized by DNS and where that control is absent, inherited, or delegated.",
|
||||
*(
|
||||
["- Read Chapter 7 if you want the focused Subject-CN cohort analysis and why that subset behaves differently from the wider estate."]
|
||||
["- Read Chapter 8 if you want the focused Subject-CN cohort analysis and why that subset behaves differently from the wider estate."]
|
||||
if has_focus
|
||||
else []
|
||||
),
|
||||
|
|
@ -886,8 +1024,83 @@ def render_markdown(
|
|||
lines.append("")
|
||||
lines.append("The glossary terms above are the building blocks used in the DNS-outcome table. This is also why the management summary mentions Adobe Campaign, CloudFront, Apigee, and Pega at all: not because brand names are the point, but because those names reveal what kind of public delivery role a hostname is landing on. CloudFront suggests a distribution edge, Apigee suggests managed API exposure, Adobe Campaign suggests a marketing or communications front, and a load balancer suggests traffic distribution to backend services.")
|
||||
lines.append("")
|
||||
lines.append("The next chapter keeps the same names in view but asks a different question: not where the names land, but which public CA families DNS currently authorizes to issue for them.")
|
||||
lines.append("")
|
||||
lines.append("## Chapter 7: DNS Issuance Policy Control (CAA)")
|
||||
lines.append("")
|
||||
lines.append("**Management Summary**")
|
||||
lines.append("")
|
||||
for zone in caa_analysis.configured_domains:
|
||||
zone_rows = ct_caa_analysis.rows_for_zone(caa_analysis, zone)
|
||||
unrestricted_count = sum(1 for row in zone_rows if not row.allowed_ca_families)
|
||||
mismatch_count = sum(1 for row in zone_rows if row.current_policy_mismatch)
|
||||
overlap_count = sum(1 for row in zone_rows if row.current_multi_family_overlap)
|
||||
dominant_policy = ct_caa_analysis.policy_counter(zone_rows).most_common(1)
|
||||
dominant_label = caa_policy_label(dominant_policy[0][0]) if dominant_policy else "none"
|
||||
lines.append(
|
||||
f"- `{zone}`: {len(zone_rows)} names in scope; dominant policy is {dominant_label}; unrestricted names={unrestricted_count}; current policy-mismatch names={mismatch_count}; current multi-family overlap names={overlap_count}."
|
||||
)
|
||||
lines.extend(
|
||||
[
|
||||
f"- Effective CAA discovery paths across all names: {', '.join(f'{caa_source_label(kind)}={count}' for kind, count in caa_analysis.source_kind_counts.most_common())}.",
|
||||
f"- Current names simultaneously covered by more than one live CA family: {len(caa_analysis.multi_family_overlap_names)}.",
|
||||
f"- Current names whose live certificate family does not match today's published CAA policy: {len(caa_analysis.policy_mismatch_names)}.",
|
||||
]
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("CAA is the DNS control layer for public certificate issuance. It does not validate a certificate after issuance; instead, it tells a public CA which CA families are authorized to issue for a DNS name if any restriction is published at all. If no CAA is published, WebPKI issuance is unrestricted from the DNS-policy point of view.")
|
||||
lines.append("")
|
||||
lines.append("This chapter adds the missing control dimension to the earlier chapters. The certificate chapter showed who actually issued. The DNS chapter showed where the names land. The CAA chapter shows which issuers are supposed to be allowed by DNS policy.")
|
||||
lines.append("")
|
||||
lines.append("CAA is checked per DNS name requested in the certificate, not per Subject DN and not per organisational story. A Subject CN can therefore shift between different Subject DN values without creating a CAA clash, because CAA ignores organisation fields and looks only at the DNS names being certified.")
|
||||
lines.append("")
|
||||
lines.append("### How To Read The CAA Results")
|
||||
lines.append("")
|
||||
lines.extend(md_table(["CAA Discovery Result", "Names", "Meaning"], caa_source_rows(caa_analysis)))
|
||||
lines.append("")
|
||||
lines.append("The key distinction is between ordinary parent inheritance and alias-target-derived policy. Parent inheritance means the leaf name simply relies on a policy published higher in its own DNS tree. Alias-target-derived policy means the effective CAA surfaced through an alias response. In this corpus, that often marks a managed rail or specialist external platform rather than a plain brand-front hostname.")
|
||||
lines.append("")
|
||||
lines.append("### Policy Regimes By Configured Zone")
|
||||
lines.append("")
|
||||
for zone in caa_analysis.configured_domains:
|
||||
lines.append(f"#### `{zone}`")
|
||||
lines.append("")
|
||||
lines.extend(md_table(["Policy Regime", "Names", "Plain-Language Meaning"], caa_zone_rows[zone]))
|
||||
lines.append("")
|
||||
if secondary_zone:
|
||||
lines.append(f"The contrast between `{primary_zone}` and `{secondary_zone}` is one of the strongest PKI-governance findings in the corpus. `{primary_zone}` is policy-layered and governed, while `{secondary_zone}` is currently CAA-empty in the scanned name set. That does not make `{secondary_zone}` invalid, but it does mean DNS is not constraining public CA choice there.")
|
||||
lines.append("")
|
||||
lines.append("### What The CAA Layer Does To The Earlier Thesis")
|
||||
lines.append("")
|
||||
lines.extend(
|
||||
[
|
||||
"- The CAA layer strengthens the earlier certificate-and-DNS thesis rather than overturning it. The same service families that already looked like shared managed rails from naming and DNS often sit under narrower issuance policy as well.",
|
||||
f"- In `{primary_zone}`, the current CAA friction is concentrated rather than diffuse: {caa_concentration_text(caa_analysis, primary_zone)}.",
|
||||
"- Broad corporate default policy remains visible on many ordinary brand-facing names. That supports the earlier reading that not every public hostname was moved onto one tightly managed delivery rail.",
|
||||
"- Vendor-style exceptions still exist. Where a name resolves through a specialist external platform and the allowed CA set widens or changes shape, the policy layer supports the earlier vendor-delegation reading rather than contradicting it.",
|
||||
]
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("### Current Multi-Family Overlap")
|
||||
lines.append("")
|
||||
if caa_analysis.multi_family_overlap_names:
|
||||
lines.extend(md_table(["DNS Name", "Zone", "Live CA Families", "Covering Subject CNs"], top_caa_overlap_rows(caa_analysis)))
|
||||
else:
|
||||
lines.append("No current multi-family overlap names were found.")
|
||||
lines.append("")
|
||||
lines.append("These overlap names are operationally important. They show where the same public DNS name is currently covered by more than one live CA family at once. In this corpus, that behavior clusters tightly in a few service families rather than being spread randomly across the estate.")
|
||||
lines.append("")
|
||||
lines.append("### Current Policy Mismatch")
|
||||
lines.append("")
|
||||
if caa_analysis.policy_mismatch_names:
|
||||
lines.extend(md_table(["DNS Name", "Zone", "Live CA Families", "CAA-Allowed Families", "CAA Discovery Result"], top_caa_mismatch_rows(caa_analysis)))
|
||||
else:
|
||||
lines.append("No current policy-mismatch names were found.")
|
||||
lines.append("")
|
||||
lines.append("A current policy mismatch does not automatically prove CA misissuance. CAA only proves what DNS authorizes now. Certificates can remain valid after the DNS-side policy has changed, so the right reading here is current policy lag or migration residue unless the historical issuance-time DNS can also be shown.")
|
||||
lines.append("")
|
||||
if focus_analysis:
|
||||
lines.append("## Chapter 7: Focused Subject-CN Cohort")
|
||||
lines.append("## Chapter 8: Focused Subject-CN Cohort")
|
||||
lines.append("")
|
||||
lines.append("**Management Summary**")
|
||||
lines.append("")
|
||||
|
|
@ -965,16 +1178,16 @@ def render_markdown(
|
|||
lines.append("")
|
||||
lines.extend(
|
||||
[
|
||||
"- The certificate layer and the DNS layer are not two separate stories. They are two views of the same operating estate.",
|
||||
"- The certificate, DNS, and CAA layers are not three separate stories. They are three views of the same operating estate.",
|
||||
"- Clean public brand names usually sit closest to the customer surface.",
|
||||
"- Dense SAN sets, numbered families, and multi-zone certificates usually expose the underlying shared service rails and platform layer.",
|
||||
"- The overall shape is more consistent with a federated operating model than with random hostname sprawl.",
|
||||
"- Dense SAN sets, numbered families, multi-zone certificates, and narrower CAA policy usually expose the underlying shared service rails and platform layer.",
|
||||
"- The overall shape is more consistent with a federated operating model with uneven governance maturity than with random hostname sprawl.",
|
||||
]
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("The common ground is operational reality. A branded proposition wants recognisable names. A service team wants a stable endpoint namespace. A platform team wants shared rails and repeatable delivery machinery. A hosting team wants routable front doors that can land on cloud distribution, gateways, or workflow platforms. The certificates and the DNS tell the same story from different angles.")
|
||||
lines.append("The common ground is operational reality. A branded proposition wants recognisable names. A service team wants a stable endpoint namespace. A platform team wants shared rails and repeatable delivery machinery. A hosting team wants routable front doors that can land on cloud distribution, gateways, or workflow platforms. A security or PKI function wants some names tightly governed and other names left broad or delegated. Certificates, DNS, and CAA tell the same estate story from different angles.")
|
||||
lines.append("")
|
||||
lines.append("This is why the estate can look both tidy and messy at once. It is tidy within each layer, but messy across layers because the layers are solving different problems.")
|
||||
lines.append("This is why the estate can look both tidy and messy at once. It is tidy within each layer, but messy across layers because the layers are solving different problems. The new CAA evidence sharpens that point rather than contradicting it: the managed rail families are not only named and hosted differently, they are often policy-controlled differently as well. The biggest qualification is that governance is uneven. The primary configured zone shows layered issuance control, while another configured zone remains CAA-empty. That is not random chaos, but it is also not uniform control maturity.")
|
||||
lines.append("")
|
||||
lines.append(f"## Chapter {limits_chapter}: Limits, Confidence, and Noise")
|
||||
lines.append("")
|
||||
|
|
@ -982,14 +1195,15 @@ def render_markdown(
|
|||
lines.append("")
|
||||
lines.extend(
|
||||
[
|
||||
"- High-confidence claims are the ones tied directly to observable certificate fields, DNS answers, and trust records.",
|
||||
"- High-confidence claims are the ones tied directly to observable certificate fields, DNS answers, trust records, and current CAA policy.",
|
||||
"- Medium-confidence claims are organisational readings drawn from repeated technical patterns.",
|
||||
"- Lower-confidence claims are exact expansions of abbreviations or exact internal ownership boundaries.",
|
||||
"- Some DNS names do not resolve publicly today; that does not invalidate the certificate-side evidence because certificate and DNS timelines are not identical.",
|
||||
"- A current CAA mismatch does not by itself prove historical CA non-compliance, because DNS policy may have changed after issuance.",
|
||||
]
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("A useful way to read the corpus is to separate signal from noise. Repeated naming schemas are signal. Repeated DNS outcomes are signal. Which public CA family keeps issuing a name is signal. Simple `www` presence or absence is weak evidence either way unless it coincides with stronger differences such as distinct DNS routing, distinct SAN composition, or a distinct certificate renewal history.")
|
||||
lines.append("A useful way to read the corpus is to separate signal from noise. Repeated naming schemas are signal. Repeated DNS outcomes are signal. Which public CA family keeps issuing a name is signal. Where CAA is broad, narrow, delegated, or absent is signal. Simple `www` presence or absence is weak evidence either way unless it coincides with stronger differences such as distinct DNS routing, distinct SAN composition, a distinct certificate renewal history, or a distinct issuance-policy shape.")
|
||||
lines.append("")
|
||||
lines.append("## Appendix A: Full Family Catalogue")
|
||||
lines.append("")
|
||||
|
|
@ -1234,15 +1448,44 @@ def render_markdown(
|
|||
else:
|
||||
lines.append("No step weeks met the threshold.")
|
||||
lines.append("")
|
||||
if focus_analysis:
|
||||
lines.append("## Appendix C: Focused Subject-CN Detail")
|
||||
lines.append(f"## Appendix {caa_appendix}: CAA Policy Detail")
|
||||
lines.append("")
|
||||
lines.append("This appendix keeps the issuance-policy evidence inside the monograph. It answers a narrower question than the DNS appendix: not where a name lands, but which public CA families DNS currently authorizes to issue for that name.")
|
||||
lines.append("")
|
||||
lines.append("### C.1 CAA Discovery Paths")
|
||||
lines.append("")
|
||||
lines.extend(md_table(["CAA Discovery Result", "Names", "Meaning"], caa_source_rows(caa_analysis)))
|
||||
lines.append("")
|
||||
lines.append("### C.2 Policy Regimes By Configured Zone")
|
||||
lines.append("")
|
||||
for zone in caa_analysis.configured_domains:
|
||||
lines.append(f"#### `{zone}`")
|
||||
lines.append("")
|
||||
lines.append("This appendix keeps the complete focused-cohort table inside the monograph, but it now follows the three-bucket taxonomy from Chapter 7. That makes it easier to read the cohort as a set of related naming traditions instead of as one flat mixed list.")
|
||||
lines.extend(md_table(["Policy Regime", "Names", "Plain-Language Meaning"], caa_zone_rows[zone]))
|
||||
lines.append("")
|
||||
lines.append("### C.3 Current Multi-Family Overlap")
|
||||
lines.append("")
|
||||
if caa_analysis.multi_family_overlap_names:
|
||||
lines.extend(md_table(["DNS Name", "Zone", "Live CA Families", "Covering Subject CNs"], top_caa_overlap_rows(caa_analysis, 40)))
|
||||
else:
|
||||
lines.append("No current multi-family overlap names were found.")
|
||||
lines.append("")
|
||||
lines.append("### C.4 Current Policy Mismatch")
|
||||
lines.append("")
|
||||
if caa_analysis.policy_mismatch_names:
|
||||
lines.extend(md_table(["DNS Name", "Zone", "Live CA Families", "CAA-Allowed Families", "CAA Discovery Result"], top_caa_mismatch_rows(caa_analysis, 40)))
|
||||
else:
|
||||
lines.append("No current policy-mismatch names were found.")
|
||||
lines.append("")
|
||||
if focus_analysis:
|
||||
lines.append(f"## Appendix {focus_appendix}: Focused Subject-CN Detail")
|
||||
lines.append("")
|
||||
lines.append("This appendix keeps the complete focused-cohort table inside the monograph, but it now follows the three-bucket taxonomy from Chapter 8. That makes it easier to read the cohort as a set of related naming traditions instead of as one flat mixed list.")
|
||||
lines.append("")
|
||||
appendix_buckets = [
|
||||
("direct_front_door", "### C.1 Front-Door Direct Names"),
|
||||
("platform_matrix_anchor", "### C.2 Platform-Anchor Matrix Names"),
|
||||
("ambiguous_legacy", "### C.3 Ambiguous Or Legacy Residue"),
|
||||
("direct_front_door", "### D.1 Front-Door Direct Names"),
|
||||
("platform_matrix_anchor", "### D.2 Platform-Anchor Matrix Names"),
|
||||
("ambiguous_legacy", "### D.3 Ambiguous Or Legacy Residue"),
|
||||
]
|
||||
for bucket, heading in appendix_buckets:
|
||||
rows = focus_appendix_rows(focus_analysis, bucket)
|
||||
|
|
@ -1284,6 +1527,7 @@ def render_latex(
|
|||
args: argparse.Namespace,
|
||||
report: dict[str, object],
|
||||
assessment: ct_lineage_report.HistoricalAssessment,
|
||||
caa_analysis: ct_caa_analysis.CaaAnalysis,
|
||||
focus_analysis: ct_focus_subjects.FocusCohortAnalysis | None,
|
||||
) -> None:
|
||||
args.latex_output.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
|
@ -1332,6 +1576,12 @@ def render_latex(
|
|||
focus_bucket_summary = focus_bucket_summary_rows(focus_analysis) if focus_analysis else []
|
||||
focus_representatives = focus_representative_rows(focus_analysis) if focus_analysis else []
|
||||
has_focus = focus_analysis is not None
|
||||
caa_zone_rows = {
|
||||
zone: caa_zone_policy_rows(caa_analysis, zone)
|
||||
for zone in caa_analysis.configured_domains
|
||||
}
|
||||
primary_zone = report["domains"][0] if report["domains"] else "configured primary zone"
|
||||
secondary_zone = report["domains"][1] if len(report["domains"]) > 1 else None
|
||||
appendix_pdf_path = args.appendix_pdf_output.resolve().as_posix()
|
||||
lines: list[str] = [
|
||||
r"\documentclass[11pt]{article}",
|
||||
|
|
@ -1413,7 +1663,8 @@ def render_latex(
|
|||
f"{purpose_summary.category_counts.get('tls_server_only', 0)} certificates are ordinary public TLS server certificates, while {purpose_summary.category_counts.get('tls_server_and_client', 0)} come from templates that also permit client-certificate use.",
|
||||
f"{historical_count} historical leaf certificates show how the same names evolved over time.",
|
||||
f"{len(report['unique_dns_names'])} DNS SAN names were scanned live.",
|
||||
"The estate is best understood as layers of branding, service naming, platform naming, and delivery naming rather than as random clutter.",
|
||||
f"{caa_analysis.total_names} DNS names were also assessed for effective CAA policy, revealing where issuance is centrally governed, delegated, or left unrestricted.",
|
||||
"The estate is best understood as layers of branding, service naming, platform naming, delivery naming, and issuance-policy control rather than as random clutter.",
|
||||
]
|
||||
)
|
||||
lines.append(
|
||||
|
|
@ -1428,8 +1679,9 @@ def render_latex(
|
|||
"Chapters 2 and 3 explain what the current certificates are and what they are for.",
|
||||
"Chapter 4 explains the historical lifecycle and splits red flags into current versus fixed-in-the-past.",
|
||||
"Chapters 5 and 6 explain naming and DNS delivery.",
|
||||
"Chapter 7 explains the issuance-policy layer: which public CAs DNS currently authorizes and where DNS imposes no restriction at all.",
|
||||
*(
|
||||
["Chapter 7 explains the focused Subject-CN cohort and why it behaves differently from the wider estate."]
|
||||
["Chapter 8 explains the focused Subject-CN cohort and why it behaves differently from the wider estate."]
|
||||
if has_focus
|
||||
else []
|
||||
),
|
||||
|
|
@ -1758,6 +2010,117 @@ def render_latex(
|
|||
lines.append(
|
||||
r"The glossary terms above are the building blocks used in the DNS-outcome table. This is also why the management summary mentions Adobe Campaign, CloudFront, Apigee, and Pega at all: not because brand names are the point, but because those names reveal what kind of public delivery role a hostname is landing on. CloudFront suggests a distribution edge, Apigee suggests managed API exposure, Adobe Campaign suggests a marketing or communications front, and a load balancer suggests traffic distribution to backend services."
|
||||
)
|
||||
lines.append(
|
||||
r"The next chapter keeps the same names in view but asks a different question: not where the names land, but which public CA families DNS currently authorizes to issue for them."
|
||||
)
|
||||
|
||||
lines.append(r"\section{DNS Issuance Policy Control (CAA)}")
|
||||
zone_summary_items: list[str] = []
|
||||
for zone in caa_analysis.configured_domains:
|
||||
zone_rows = ct_caa_analysis.rows_for_zone(caa_analysis, zone)
|
||||
unrestricted_count = sum(1 for row in zone_rows if not row.allowed_ca_families)
|
||||
mismatch_count = sum(1 for row in zone_rows if row.current_policy_mismatch)
|
||||
overlap_count = sum(1 for row in zone_rows if row.current_multi_family_overlap)
|
||||
dominant_policy = ct_caa_analysis.policy_counter(zone_rows).most_common(1)
|
||||
dominant_label = caa_policy_label(dominant_policy[0][0]) if dominant_policy else "none"
|
||||
zone_summary_items.append(
|
||||
f"{zone}: {len(zone_rows)} names in scope; dominant policy is {dominant_label}; unrestricted names={unrestricted_count}; current policy-mismatch names={mismatch_count}; current multi-family overlap names={overlap_count}."
|
||||
)
|
||||
add_summary(
|
||||
zone_summary_items
|
||||
+ [
|
||||
f"Effective CAA discovery paths across all names are {', '.join(f'{caa_source_label(kind)}={count}' for kind, count in caa_analysis.source_kind_counts.most_common())}.",
|
||||
f"Current names simultaneously covered by more than one live CA family: {len(caa_analysis.multi_family_overlap_names)}.",
|
||||
f"Current names whose live certificate family does not match today's published CAA policy: {len(caa_analysis.policy_mismatch_names)}.",
|
||||
]
|
||||
)
|
||||
lines.append(
|
||||
r"CAA is the DNS control layer for public certificate issuance. It does not validate a certificate after issuance; instead, it tells a public CA which CA families are authorized to issue for a DNS name if any restriction is published at all. If no CAA is published, WebPKI issuance is unrestricted from the DNS-policy point of view."
|
||||
)
|
||||
lines.append(
|
||||
r"This chapter adds the missing control dimension to the earlier chapters. The certificate chapter showed who actually issued. The DNS chapter showed where the names land. The CAA chapter shows which issuers are supposed to be allowed by DNS policy."
|
||||
)
|
||||
lines.append(
|
||||
r"CAA is checked per DNS name requested in the certificate, not per Subject DN and not per organisational story. A Subject CN can therefore shift between different Subject DN values without creating a CAA clash, because CAA ignores organisation fields and looks only at the DNS names being certified."
|
||||
)
|
||||
lines.extend(
|
||||
[
|
||||
r"\subsection{How To Read The CAA Results}",
|
||||
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.24\linewidth} >{\raggedleft\arraybackslash}p{0.10\linewidth} >{\raggedright\arraybackslash}p{0.54\linewidth}}",
|
||||
r"\toprule",
|
||||
r"CAA Discovery Result & Names & Meaning \\",
|
||||
r"\midrule",
|
||||
]
|
||||
)
|
||||
for label, count, meaning in caa_source_rows(caa_analysis):
|
||||
lines.append(rf"{latex_escape(label)} & {latex_escape(count)} & {latex_escape(meaning)} \\")
|
||||
lines.extend([r"\bottomrule", r"\end{longtable}"])
|
||||
lines.append(
|
||||
r"The key distinction is between ordinary parent inheritance and alias-target-derived policy. Parent inheritance means the leaf name simply relies on a policy published higher in its own DNS tree. Alias-target-derived policy means the effective CAA surfaced through an alias response. In this corpus, that often marks a managed rail or specialist external platform rather than a plain brand-front hostname."
|
||||
)
|
||||
lines.append(r"\subsection{Policy Regimes By Configured Zone}")
|
||||
for zone in caa_analysis.configured_domains:
|
||||
lines.append(rf"\subsubsection{{{latex_escape(zone)}}}")
|
||||
lines.extend(
|
||||
[
|
||||
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.25\linewidth} >{\raggedleft\arraybackslash}p{0.10\linewidth} >{\raggedright\arraybackslash}p{0.53\linewidth}}",
|
||||
r"\toprule",
|
||||
r"Policy Regime & Names & Plain-Language Meaning \\",
|
||||
r"\midrule",
|
||||
]
|
||||
)
|
||||
for regime, count, meaning in caa_zone_rows[zone]:
|
||||
lines.append(rf"{latex_escape(regime)} & {latex_escape(count)} & {latex_escape(meaning)} \\")
|
||||
lines.extend([r"\bottomrule", r"\end{longtable}"])
|
||||
if secondary_zone:
|
||||
lines.append(
|
||||
rf"The contrast between \texttt{{{latex_escape(primary_zone)}}} and \texttt{{{latex_escape(secondary_zone)}}} is one of the strongest PKI-governance findings in the corpus. \texttt{{{latex_escape(primary_zone)}}} is policy-layered and governed, while \texttt{{{latex_escape(secondary_zone)}}} is currently CAA-empty in the scanned name set. That does not make \texttt{{{latex_escape(secondary_zone)}}} invalid, but it does mean DNS is not constraining public CA choice there."
|
||||
)
|
||||
lines.extend(
|
||||
[
|
||||
r"\subsection{What The CAA Layer Does To The Earlier Thesis}",
|
||||
r"The CAA layer strengthens the earlier certificate-and-DNS thesis rather than overturning it. The same service families that already looked like shared managed rails from naming and DNS often sit under narrower issuance policy as well.",
|
||||
rf"In \texttt{{{latex_escape(primary_zone)}}}, the current CAA friction is concentrated rather than diffuse: {latex_escape(caa_concentration_text(caa_analysis, primary_zone))}.",
|
||||
r"Broad corporate default policy remains visible on many ordinary brand-facing names. That supports the earlier reading that not every public hostname was moved onto one tightly managed delivery rail.",
|
||||
r"Vendor-style exceptions still exist. Where a name resolves through a specialist external platform and the allowed CA set widens or changes shape, the policy layer supports the earlier vendor-delegation reading rather than contradicting it.",
|
||||
r"\subsection{Current Multi-Family Overlap}",
|
||||
]
|
||||
)
|
||||
if caa_analysis.multi_family_overlap_names:
|
||||
lines.extend(
|
||||
[
|
||||
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.29\linewidth} >{\raggedright\arraybackslash}p{0.14\linewidth} >{\raggedright\arraybackslash}p{0.17\linewidth} >{\raggedright\arraybackslash}p{0.28\linewidth}}",
|
||||
r"\toprule",
|
||||
r"DNS Name & Zone & Live CA Families & Covering Subject CNs \\",
|
||||
r"\midrule",
|
||||
]
|
||||
)
|
||||
for name, zone, families, subjects in top_caa_overlap_rows(caa_analysis):
|
||||
lines.append(rf"{latex_escape(name)} & {latex_escape(zone)} & {latex_escape(families)} & {latex_escape(subjects)} \\")
|
||||
lines.extend([r"\bottomrule", r"\end{longtable}"])
|
||||
else:
|
||||
lines.append(r"No current multi-family overlap names were found.")
|
||||
lines.append(
|
||||
r"These overlap names are operationally important. They show where the same public DNS name is currently covered by more than one live CA family at once. In this corpus, that behavior clusters tightly in a few service families rather than being spread randomly across the estate."
|
||||
)
|
||||
lines.append(r"\subsection{Current Policy Mismatch}")
|
||||
if caa_analysis.policy_mismatch_names:
|
||||
lines.extend(
|
||||
[
|
||||
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.27\linewidth} >{\raggedright\arraybackslash}p{0.12\linewidth} >{\raggedright\arraybackslash}p{0.16\linewidth} >{\raggedright\arraybackslash}p{0.18\linewidth} >{\raggedright\arraybackslash}p{0.17\linewidth}}",
|
||||
r"\toprule",
|
||||
r"DNS Name & Zone & Live CA Families & CAA-Allowed Families & CAA Discovery Result \\",
|
||||
r"\midrule",
|
||||
]
|
||||
)
|
||||
for name, zone, families, allowed, result in top_caa_mismatch_rows(caa_analysis):
|
||||
lines.append(rf"{latex_escape(name)} & {latex_escape(zone)} & {latex_escape(families)} & {latex_escape(allowed)} & {latex_escape(result)} \\")
|
||||
lines.extend([r"\bottomrule", r"\end{longtable}"])
|
||||
else:
|
||||
lines.append(r"No current policy-mismatch names were found.")
|
||||
lines.append(
|
||||
r"A current policy mismatch does not automatically prove CA misissuance. CAA only proves what DNS authorizes now. Certificates can remain valid after the DNS-side policy has changed, so the right reading here is current policy lag or migration residue unless the historical issuance-time DNS can also be shown."
|
||||
)
|
||||
|
||||
if focus_analysis:
|
||||
lines.append(r"\section{Focused Subject-CN Cohort}")
|
||||
|
|
@ -1864,27 +2227,31 @@ def render_latex(
|
|||
lines.append(r"\section{Making The Whole Estate Make Sense}")
|
||||
add_summary(
|
||||
[
|
||||
"Certificates explain trust, naming, and purpose. DNS explains routing and delivery.",
|
||||
"Certificates, DNS, and CAA explain trust, routing, delivery, and issuance control.",
|
||||
"Clean public names usually sit closest to the customer-facing surface.",
|
||||
"Dense SAN sets, numbered families, and multi-zone certificates tend to expose the platform layer beneath the brand layer.",
|
||||
"The overall pattern is more consistent with a federated operating model than with random hostname sprawl.",
|
||||
"Dense SAN sets, numbered families, multi-zone certificates, and narrower CAA policy tend to expose the platform layer beneath the brand layer.",
|
||||
"The overall pattern is more consistent with a federated operating model with uneven governance maturity than with random hostname sprawl.",
|
||||
]
|
||||
)
|
||||
lines.append(
|
||||
r"The apparent arbitrariness is not best explained as disorder. It is better explained as the visible overlap of multiple valid naming systems created by different functions: brand presentation, service design, operational delivery, and gradual migration."
|
||||
r"The apparent arbitrariness is not best explained as disorder. It is better explained as the visible overlap of multiple valid naming systems created by different functions: brand presentation, service design, operational delivery, issuance control, and gradual migration."
|
||||
)
|
||||
lines.append(
|
||||
r"The new CAA evidence sharpens that point rather than contradicting it. The same families that looked like managed rails from certificate naming and DNS landing often sit under narrower issuance policy as well. The main qualification is that governance is uneven. One configured public zone is policy-layered and governed, while another remains CAA-empty. That is not random chaos, but it is also not uniform control maturity."
|
||||
)
|
||||
|
||||
lines.append(r"\section{Limits, Confidence, and Noise}")
|
||||
add_summary(
|
||||
[
|
||||
"High-confidence claims are tied directly to certificate fields, DNS answers, and live trust records.",
|
||||
"High-confidence claims are tied directly to certificate fields, DNS answers, live trust records, and current CAA policy.",
|
||||
"Medium-confidence claims are organisational readings drawn from repeated technical patterns.",
|
||||
"Lower-confidence claims are exact expansions of abbreviations and exact ownership boundaries inferred from names alone.",
|
||||
"A public NXDOMAIN today does not automatically contradict a valid certificate because DNS and certificate lifecycles move on different clocks.",
|
||||
"A current CAA mismatch does not by itself prove historical CA non-compliance, because DNS policy may have changed after issuance.",
|
||||
]
|
||||
)
|
||||
lines.append(
|
||||
r"A useful way to read the corpus is to separate signal from noise. Repeated naming schemas are signal. Repeated DNS outcomes are signal. Which public CA family keeps issuing a name is signal. Simple \texttt{www} presence or absence is weak evidence either way unless it coincides with stronger differences such as distinct DNS routing, distinct SAN composition, or a distinct certificate renewal history."
|
||||
r"A useful way to read the corpus is to separate signal from noise. Repeated naming schemas are signal. Repeated DNS outcomes are signal. Which public CA family keeps issuing a name is signal. Where CAA is broad, narrow, delegated, or absent is signal. Simple \texttt{www} presence or absence is weak evidence either way unless it coincides with stronger differences such as distinct DNS routing, distinct SAN composition, a distinct certificate renewal history, or a distinct issuance-policy shape."
|
||||
)
|
||||
|
||||
lines.extend(
|
||||
|
|
@ -2113,11 +2480,70 @@ def render_latex(
|
|||
else:
|
||||
lines.append(r"No step weeks met the threshold.")
|
||||
|
||||
lines.extend(
|
||||
[
|
||||
r"\section{CAA Policy Detail}",
|
||||
r"This appendix keeps the issuance-policy evidence inside the monograph. It answers a narrower question than the DNS appendix: not where a name lands, but which public CA families DNS currently authorizes to issue for that name.",
|
||||
r"\subsection{CAA Discovery Paths}",
|
||||
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.24\linewidth} >{\raggedleft\arraybackslash}p{0.10\linewidth} >{\raggedright\arraybackslash}p{0.54\linewidth}}",
|
||||
r"\toprule",
|
||||
r"CAA Discovery Result & Names & Meaning \\",
|
||||
r"\midrule",
|
||||
]
|
||||
)
|
||||
for label, count, meaning in caa_source_rows(caa_analysis):
|
||||
lines.append(rf"{latex_escape(label)} & {latex_escape(count)} & {latex_escape(meaning)} \\")
|
||||
lines.extend([r"\bottomrule", r"\end{longtable}"])
|
||||
lines.append(r"\subsection{Policy Regimes By Configured Zone}")
|
||||
for zone in caa_analysis.configured_domains:
|
||||
lines.append(rf"\subsubsection{{{latex_escape(zone)}}}")
|
||||
lines.extend(
|
||||
[
|
||||
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.25\linewidth} >{\raggedleft\arraybackslash}p{0.10\linewidth} >{\raggedright\arraybackslash}p{0.53\linewidth}}",
|
||||
r"\toprule",
|
||||
r"Policy Regime & Names & Plain-Language Meaning \\",
|
||||
r"\midrule",
|
||||
]
|
||||
)
|
||||
for regime, count, meaning in caa_zone_rows[zone]:
|
||||
lines.append(rf"{latex_escape(regime)} & {latex_escape(count)} & {latex_escape(meaning)} \\")
|
||||
lines.extend([r"\bottomrule", r"\end{longtable}"])
|
||||
lines.append(r"\subsection{Current Multi-Family Overlap}")
|
||||
if caa_analysis.multi_family_overlap_names:
|
||||
lines.extend(
|
||||
[
|
||||
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.29\linewidth} >{\raggedright\arraybackslash}p{0.14\linewidth} >{\raggedright\arraybackslash}p{0.17\linewidth} >{\raggedright\arraybackslash}p{0.28\linewidth}}",
|
||||
r"\toprule",
|
||||
r"DNS Name & Zone & Live CA Families & Covering Subject CNs \\",
|
||||
r"\midrule",
|
||||
]
|
||||
)
|
||||
for name, zone, families, subjects in top_caa_overlap_rows(caa_analysis, 40):
|
||||
lines.append(rf"{latex_escape(name)} & {latex_escape(zone)} & {latex_escape(families)} & {latex_escape(subjects)} \\")
|
||||
lines.extend([r"\bottomrule", r"\end{longtable}"])
|
||||
else:
|
||||
lines.append(r"No current multi-family overlap names were found.")
|
||||
lines.append(r"\subsection{Current Policy Mismatch}")
|
||||
if caa_analysis.policy_mismatch_names:
|
||||
lines.extend(
|
||||
[
|
||||
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.27\linewidth} >{\raggedright\arraybackslash}p{0.12\linewidth} >{\raggedright\arraybackslash}p{0.16\linewidth} >{\raggedright\arraybackslash}p{0.18\linewidth} >{\raggedright\arraybackslash}p{0.17\linewidth}}",
|
||||
r"\toprule",
|
||||
r"DNS Name & Zone & Live CA Families & CAA-Allowed Families & CAA Discovery Result \\",
|
||||
r"\midrule",
|
||||
]
|
||||
)
|
||||
for name, zone, families, allowed, result in top_caa_mismatch_rows(caa_analysis, 40):
|
||||
lines.append(rf"{latex_escape(name)} & {latex_escape(zone)} & {latex_escape(families)} & {latex_escape(allowed)} & {latex_escape(result)} \\")
|
||||
lines.extend([r"\bottomrule", r"\end{longtable}"])
|
||||
else:
|
||||
lines.append(r"No current policy-mismatch names were found.")
|
||||
|
||||
if focus_analysis:
|
||||
lines.extend(
|
||||
[
|
||||
r"\section{Focused Subject-CN Detail}",
|
||||
r"This appendix keeps the complete focused-cohort table inside the monograph, but it now follows the three-bucket taxonomy from Chapter 7. That makes it easier to read the cohort as a set of related naming traditions instead of as one flat mixed list.",
|
||||
r"This appendix keeps the complete focused-cohort table inside the monograph, but it now follows the three-bucket taxonomy from Chapter 8. That makes it easier to read the cohort as a set of related naming traditions instead of as one flat mixed list.",
|
||||
]
|
||||
)
|
||||
appendix_buckets = [
|
||||
|
|
@ -2163,6 +2589,12 @@ def main() -> int:
|
|||
args = parse_args()
|
||||
report = ct_master_report.summarize_for_report(args)
|
||||
assessment = ct_lineage_report.build_assessment(build_history_args(args))
|
||||
caa_analysis = ct_caa_analysis.build_analysis(
|
||||
report["hits"],
|
||||
report["domains"],
|
||||
args.caa_cache_dir,
|
||||
args.caa_cache_ttl_seconds,
|
||||
)
|
||||
focus_subjects = ct_focus_subjects.load_focus_subjects(args.focus_subjects_file)
|
||||
focus_analysis = ct_focus_subjects.build_analysis(
|
||||
focus_subjects,
|
||||
|
|
@ -2172,8 +2604,8 @@ def main() -> int:
|
|||
args.dns_cache_ttl_seconds,
|
||||
)
|
||||
render_appendix_inventory(args, report)
|
||||
render_markdown(args, report, assessment, focus_analysis)
|
||||
render_latex(args, report, assessment, focus_analysis)
|
||||
render_markdown(args, report, assessment, caa_analysis, focus_analysis)
|
||||
render_latex(args, report, assessment, caa_analysis, focus_analysis)
|
||||
if not args.skip_pdf:
|
||||
ct_scan.compile_latex_to_pdf(args.latex_output, args.pdf_output, args.pdf_engine)
|
||||
if not args.quiet:
|
||||
|
|
|
|||
Loading…
Reference in a new issue