Rebalance EKU analysis in monograph

This commit is contained in:
saymrwulf 2026-03-29 13:01:02 +02:00
parent adf6e950ae
commit ba0ef192a9

View file

@ -97,6 +97,43 @@ def short_issuer(issuer_name: str) -> str:
return issuer_name
def pct(count: int, total: int) -> str:
if total <= 0:
return "0.0%"
return f"{(count / total) * 100:.1f}%"
def purpose_label(category: str) -> str:
return {
"tls_server_only": "TLS server only",
"tls_server_and_client": "TLS server and client auth",
"client_auth_only": "Client auth only",
"smime_only": "S/MIME only",
"code_signing_only": "Code signing only",
"mixed_or_other": "Mixed or other",
"no_eku": "No EKU",
}.get(category, category)
def purpose_meaning(category: str) -> str:
return {
"tls_server_only": "Standard public website or API endpoint certificate.",
"tls_server_and_client": "Server certificate whose EKU also permits client-certificate use.",
"client_auth_only": "Identity-style certificate for a person, robot, or agent in mTLS.",
"smime_only": "Email-signing or email-encryption certificate.",
"code_signing_only": "Software-signing certificate rather than a web-endpoint certificate.",
"mixed_or_other": "Unusual or mixed EKU combination requiring case-by-case review.",
"no_eku": "Certificate without an Extended Key Usage extension.",
}.get(category, "Certificate purpose category.")
def collapse_issuer_counts_by_family(issuer_counts: dict[str, int]) -> Counter[str]:
families: Counter[str] = Counter()
for issuer_name, count in issuer_counts.items():
families[short_issuer(issuer_name)] += count
return families
def build_issuer_family_rows(report: dict[str, object]) -> list[dict[str, str]]:
issuer_trust = report["issuer_trust"]
families: dict[str, dict[str, object]] = {}
@ -142,8 +179,39 @@ def render_markdown(args: argparse.Namespace, report: dict[str, object]) -> None
hits = report["hits"]
groups = report["groups"]
purpose_summary = report["purpose_summary"]
total_certificates = len(report["classifications"])
dual_items = [item for item in report["classifications"] if item.category == "tls_server_and_client"]
dual_issuer_counts = Counter(short_issuer(item.issuer_name) for item in dual_items)
server_only_count = purpose_summary.category_counts.get("tls_server_only", 0)
dual_count = purpose_summary.category_counts.get("tls_server_and_client", 0)
server_only_issuer_families = collapse_issuer_counts_by_family(
purpose_summary.issuer_breakdown.get("tls_server_only", {})
)
purpose_rows = [
[
purpose_label(category),
str(count),
pct(count, total_certificates),
purpose_meaning(category),
]
for category, count in [
("tls_server_only", purpose_summary.category_counts.get("tls_server_only", 0)),
("tls_server_and_client", purpose_summary.category_counts.get("tls_server_and_client", 0)),
("client_auth_only", purpose_summary.category_counts.get("client_auth_only", 0)),
("smime_only", purpose_summary.category_counts.get("smime_only", 0)),
("code_signing_only", purpose_summary.category_counts.get("code_signing_only", 0)),
("mixed_or_other", purpose_summary.category_counts.get("mixed_or_other", 0)),
("no_eku", purpose_summary.category_counts.get("no_eku", 0)),
]
]
eku_template_rows = [
[template, str(count), pct(count, total_certificates)]
for template, count in purpose_summary.eku_templates.items()
]
key_usage_rows = [
[template, str(count), pct(count, total_certificates)]
for template, count in purpose_summary.key_usage_templates.items()
]
issuer_rows = [
[
row["family"],
@ -267,6 +335,12 @@ def render_markdown(args: argparse.Namespace, report: dict[str, object]) -> None
lines.append("")
lines.append("This chapter addresses a key ambiguity. A certificate can be technically valid for several uses. The corpus was therefore assessed from the actual EKU and KeyUsage fields, not from the hostname style alone.")
lines.append("")
lines.append("### Purpose Map")
lines.append("")
lines.extend(md_table(["Usage Class", "Certificates", "Share", "Meaning"], purpose_rows))
lines.append("")
lines.append("The basic picture is simple: the corpus is overwhelmingly made of ordinary public TLS server certificates, with a smaller minority whose EKU also permits client-certificate use.")
lines.append("")
lines.append("**Plain-language explanation of the usage categories**")
lines.append("")
lines.extend(
@ -281,26 +355,55 @@ def render_markdown(args: argparse.Namespace, report: dict[str, object]) -> None
lines.append("")
lines.append("The result is clean. This corpus is entirely TLS-capable. There is no evidence of a separate S/MIME or code-signing estate, and there are no client-auth-only certificates.")
lines.append("")
lines.append("### EKU and KeyUsage Templates")
lines.append("")
lines.append("At the template level, the corpus is even simpler than the certificate count suggests. Only two EKU templates appear at all, and one KeyUsage template dominates almost completely.")
lines.append("")
lines.extend(md_table(["EKU Template", "Certificates", "Share"], eku_template_rows))
lines.append("")
lines.extend(md_table(["KeyUsage Template", "Certificates", "Share"], key_usage_rows))
lines.append("")
lines.append("### The Majority Pattern: Server-Only Public TLS")
lines.append("")
lines.extend(
[
f"- Server-only certificates account for {server_only_count} of {total_certificates} certificates, or {pct(server_only_count, total_certificates)} of the corpus.",
f"- Server-only validity starts are split between {', '.join(f'{year} ({count})' for year, count in purpose_summary.validity_start_years.get('tls_server_only', {}).items())}.",
f"- Server-only issuer-family concentration: {', '.join(f'{name} ({count})' for name, count in server_only_issuer_families.most_common())}.",
"- This is the normal public WebPKI server-certificate pattern for websites, APIs, and edge service front doors.",
]
)
lines.append("")
lines.append("This majority bucket is not background noise. It is the main operational reality visible in the scan: public DNS names covered by publicly trusted endpoint certificates.")
lines.append("")
if dual_rows:
lines.append("### What Dual EKU Means")
lines.append("### The Minority Pattern: Dual EKU")
lines.append("")
lines.append("EKU means *allowed purpose*, not *observed real-world use*. A dual-EKU certificate is a certificate whose X.509 policy says it may be used both as a TLS server certificate and as a TLS client certificate.")
lines.append("")
lines.extend(
[
f"- Dual-EKU certificates in this corpus: {len(dual_items)}.",
f"- Dual-EKU certificates in this corpus: {dual_count}, or {pct(dual_count, total_certificates)} of the corpus.",
f"- Issuer-family concentration inside the dual-EKU bucket: {', '.join(f'{name} ({count})' for name, count in dual_issuer_counts.most_common())}.",
f"- Dual-EKU Subject CN families that also have a strict server-only sibling: {len(purpose_summary.dual_eku_subject_cns_with_server_only_sibling)}.",
f"- Dual-EKU Subject CN families that appear only in the dual-EKU bucket: {len(purpose_summary.dual_eku_subject_cns_without_server_only_sibling)}.",
f"- Dual-EKU validity starts are split between {', '.join(f'{year} ({count})' for year, count in purpose_summary.validity_start_years.get('tls_server_and_client', {}).items())}.",
]
)
lines.append("")
lines.append("The important interpretation point is this: these still look like public hostname certificates, not person or robot identity certificates. They have DNS-style Subject CN values, DNS SAN lists, and public WebPKI issuers. The best reading is therefore not 'this is a separate client-certificate estate', but rather 'some server certificates were issued from a template that also allowed clientAuth'.")
lines.append("")
lines.append("### Full Dual-EKU Certificate Catalogue")
lines.append("")
lines.extend(md_table(["Subject CN", "Valid From", "Valid To", "Issuer", "DNS SANs"], dual_rows))
lines.append("")
lines.append("### What Is Not Present")
lines.append("")
lines.extend(
[
"- There are no client-auth-only certificates in the corpus.",
"- There are no S/MIME certificates in the corpus.",
"- There are no code-signing certificates in the corpus.",
"- There are no mixed-or-other EKU combinations and no certificates missing EKU entirely.",
]
)
lines.append("")
lines.append("## Chapter 4: Naming Architecture")
lines.append("")
lines.append("**Management Summary**")
@ -388,7 +491,14 @@ def render_markdown(args: argparse.Namespace, report: dict[str, object]) -> None
lines.append("")
lines.extend(md_table(["ID", "Basis", "Type", "Certs", "CNs", "Top Stacks"], family_rows))
lines.append("")
lines.append("## Appendix B: Detailed Inventory Appendix")
if dual_rows:
lines.append("## Appendix B: Detailed Dual-EKU Catalogue")
lines.append("")
lines.append("This appendix keeps the complete dual-EKU evidence available without letting the minority case dominate the main analytical chapter.")
lines.append("")
lines.extend(md_table(["Subject CN", "Valid From", "Valid To", "Issuer", "DNS SANs"], dual_rows))
lines.append("")
lines.append("## Appendix C: Detailed Inventory Appendix")
lines.append("")
lines.append("The full issuer-first family inventory is reproduced below so that the monograph remains complete rather than merely interpretive.")
lines.append("")
@ -401,9 +511,33 @@ def render_latex(args: argparse.Namespace, report: dict[str, object]) -> None:
hits = report["hits"]
groups = report["groups"]
purpose_summary = report["purpose_summary"]
total_certificates = len(report["classifications"])
issuer_trust = report["issuer_trust"]
issuer_family_rows = build_issuer_family_rows(report)
dual_items = [item for item in report["classifications"] if item.category == "tls_server_and_client"]
dual_issuer_counts = Counter(short_issuer(item.issuer_name) for item in dual_items)
server_only_count = purpose_summary.category_counts.get("tls_server_only", 0)
dual_count = purpose_summary.category_counts.get("tls_server_and_client", 0)
server_only_issuer_families = collapse_issuer_counts_by_family(
purpose_summary.issuer_breakdown.get("tls_server_only", {})
)
purpose_rows = [
(
purpose_label(category),
str(count),
pct(count, total_certificates),
purpose_meaning(category),
)
for category, count in [
("tls_server_only", purpose_summary.category_counts.get("tls_server_only", 0)),
("tls_server_and_client", purpose_summary.category_counts.get("tls_server_and_client", 0)),
("client_auth_only", purpose_summary.category_counts.get("client_auth_only", 0)),
("smime_only", purpose_summary.category_counts.get("smime_only", 0)),
("code_signing_only", purpose_summary.category_counts.get("code_signing_only", 0)),
("mixed_or_other", purpose_summary.category_counts.get("mixed_or_other", 0)),
("no_eku", purpose_summary.category_counts.get("no_eku", 0)),
]
]
appendix_pdf_path = args.appendix_pdf_output.resolve().as_posix()
lines: list[str] = [
r"\documentclass[11pt]{article}",
@ -550,26 +684,69 @@ def render_latex(args: argparse.Namespace, report: dict[str, object]) -> None:
lines.append(
r"Extended Key Usage tells software what a certificate is allowed to do. In plain terms, this is the difference between a website certificate, a client-identity certificate, an email certificate, and a code-signing certificate."
)
dual_issuer_counts = Counter(short_issuer(item.issuer_name) for item in dual_items)
lines.extend(
[
r"\subsection{What Dual EKU Means}",
rf"In this corpus, {purpose_summary.category_counts.get('tls_server_and_client', 0)} certificates carry both \texttt{{serverAuth}} and \texttt{{clientAuth}} in Extended Key Usage. That means the certificate is \emph{{allowed}} to be used in either role. It does not prove that the certificate is actually being used as a client identity in production.",
rf"The dual-EKU bucket is concentrated in these issuer families: {latex_escape(', '.join(f'{name} ({count})' for name, count in dual_issuer_counts.most_common()))}.",
rf"{len(purpose_summary.dual_eku_subject_cns_with_server_only_sibling)} dual-EKU Subject-CN families also have a strict server-only sibling, while {len(purpose_summary.dual_eku_subject_cns_without_server_only_sibling)} currently appear only in the dual-EKU bucket.",
r"The important interpretation point is that these still look like public hostname certificates: DNS-style Subject CN values, DNS SAN lists, and public WebPKI issuers. The better reading is therefore not ``separate client-certificate estate'', but ``server certificates issued from a template that also allowed clientAuth''.",
r"\subsection{Dual-EKU Catalogue}",
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.38\linewidth} >{\raggedright\arraybackslash}p{0.12\linewidth} >{\raggedright\arraybackslash}p{0.12\linewidth} >{\raggedright\arraybackslash}p{0.18\linewidth} >{\raggedleft\arraybackslash}p{0.08\linewidth}}",
r"\subsection{Purpose Map}",
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.24\linewidth} >{\raggedleft\arraybackslash}p{0.10\linewidth} >{\raggedleft\arraybackslash}p{0.10\linewidth} >{\raggedright\arraybackslash}p{0.46\linewidth}}",
r"\toprule",
r"Subject CN & Valid From & Valid To & Issuer & DNS SANs \\",
r"Usage Class & Certs & Share & Meaning \\",
r"\midrule",
]
)
for item in dual_items:
for label, count, share, meaning in purpose_rows:
lines.append(
rf"{latex_escape(item.subject_cn)} & {latex_escape(item.valid_from_utc[:10])} & {latex_escape(item.valid_to_utc[:10])} & {latex_escape(short_issuer(item.issuer_name))} & {len(item.san_dns_names)} \\"
rf"{latex_escape(label)} & {count} & {latex_escape(share)} & {latex_escape(meaning)} \\"
)
lines.extend([r"\bottomrule", r"\end{longtable}"])
lines.append(
r"The basic picture is simple: the corpus is overwhelmingly made of ordinary public TLS server certificates, with a smaller minority whose EKU also permits client-certificate use."
)
lines.extend(
[
r"\subsection{EKU and KeyUsage Templates}",
r"At the template level, the corpus is even simpler than the certificate count suggests. Only two EKU templates appear at all, and one KeyUsage template dominates almost completely.",
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.58\linewidth} >{\raggedleft\arraybackslash}p{0.14\linewidth} >{\raggedleft\arraybackslash}p{0.14\linewidth}}",
r"\toprule",
r"EKU Template & Certs & Share \\",
r"\midrule",
]
)
for template, count in purpose_summary.eku_templates.items():
lines.append(rf"{latex_escape(template)} & {count} & {latex_escape(pct(count, total_certificates))} \\")
lines.extend([r"\bottomrule", r"\end{longtable}"])
lines.extend(
[
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.58\linewidth} >{\raggedleft\arraybackslash}p{0.14\linewidth} >{\raggedleft\arraybackslash}p{0.14\linewidth}}",
r"\toprule",
r"KeyUsage Template & Certs & Share \\",
r"\midrule",
]
)
for template, count in purpose_summary.key_usage_templates.items():
lines.append(rf"{latex_escape(template)} & {count} & {latex_escape(pct(count, total_certificates))} \\")
lines.extend([r"\bottomrule", r"\end{longtable}"])
lines.extend(
[
r"\subsection{The Majority Pattern: Server-Only Public TLS}",
rf"Server-only certificates account for {server_only_count} of {total_certificates} certificates, or {latex_escape(pct(server_only_count, total_certificates))} of the corpus.",
rf"Server-only validity starts are split between {latex_escape(', '.join(f'{year} ({count})' for year, count in purpose_summary.validity_start_years.get('tls_server_only', {}).items()))}.",
rf"Server-only issuer-family concentration is {latex_escape(', '.join(f'{name} ({count})' for name, count in server_only_issuer_families.most_common()))}.",
r"This is the normal public WebPKI server-certificate pattern for websites, APIs, and edge service front doors.",
r"This majority bucket is not background noise. It is the main operational reality visible in the scan: public DNS names covered by publicly trusted endpoint certificates.",
]
)
lines.extend(
[
r"\subsection{The Minority Pattern: Dual EKU}",
rf"In this corpus, {dual_count} certificates carry both \texttt{{serverAuth}} and \texttt{{clientAuth}} in Extended Key Usage. That is {latex_escape(pct(dual_count, total_certificates))} of the corpus. This means the certificate is \emph{{allowed}} to be used in either role. It does not prove that the certificate is actually being used as a client identity in production.",
rf"The dual-EKU bucket is concentrated in these issuer families: {latex_escape(', '.join(f'{name} ({count})' for name, count in dual_issuer_counts.most_common()))}.",
rf"{len(purpose_summary.dual_eku_subject_cns_with_server_only_sibling)} dual-EKU Subject-CN families also have a strict server-only sibling, while {len(purpose_summary.dual_eku_subject_cns_without_server_only_sibling)} currently appear only in the dual-EKU bucket.",
rf"Dual-EKU validity starts are split between {latex_escape(', '.join(f'{year} ({count})' for year, count in purpose_summary.validity_start_years.get('tls_server_and_client', {}).items()))}.",
r"The important interpretation point is that these still look like public hostname certificates: DNS-style Subject CN values, DNS SAN lists, and public WebPKI issuers. The better reading is therefore not ``separate client-certificate estate'', but ``server certificates issued from a template that also allowed clientAuth''.",
r"\subsection{What Is Not Present}",
r"There are no client-auth-only certificates, no S/MIME certificates, no code-signing certificates, no mixed-or-other EKU combinations, and no certificates missing EKU entirely.",
]
)
lines.append(r"\section{Naming Architecture}")
add_summary(
@ -666,6 +843,23 @@ def render_latex(args: argparse.Namespace, report: dict[str, object]) -> None:
)
lines.extend([r"\bottomrule", r"\end{longtable}"])
if dual_items:
lines.extend(
[
r"\section{Detailed Dual-EKU Catalogue}",
r"This appendix keeps the complete dual-EKU evidence available without letting the minority case dominate the main analytical chapter.",
r"\begin{longtable}{>{\raggedright\arraybackslash}p{0.38\linewidth} >{\raggedright\arraybackslash}p{0.12\linewidth} >{\raggedright\arraybackslash}p{0.12\linewidth} >{\raggedright\arraybackslash}p{0.18\linewidth} >{\raggedleft\arraybackslash}p{0.08\linewidth}}",
r"\toprule",
r"Subject CN & Valid From & Valid To & Issuer & DNS SANs \\",
r"\midrule",
]
)
for item in dual_items:
lines.append(
rf"{latex_escape(item.subject_cn)} & {latex_escape(item.valid_from_utc[:10])} & {latex_escape(item.valid_to_utc[:10])} & {latex_escape(short_issuer(item.issuer_name))} & {len(item.san_dns_names)} \\"
)
lines.extend([r"\bottomrule", r"\end{longtable}"])
lines.extend(
[
r"\section{Detailed Inventory Appendix}",