A new C++ back end for ocamlc
Article URL: https://github.com/ocaml/ocaml/pull/14701 Comments URL: https://news.ycombinator.com/item?id=47608058 Points: 178 # Comments: 15
This patch adds a new C++ backend to ocamlc, improving on the unincremented C currently in use by the runtime and FFI. As an example, here's a simple program that computes the prime numbers up to a user-specified limit:
module List = struct let rec filter p = function | [] -> [] | x :: l -> if p x then x :: filter p l else filter p lmodule List = struct let rec filter p = function | [] -> [] | x :: l -> if p x then x :: filter p l else filter p llet rec init i last f = if i > last then [] else f i :: init (i+1) last f end
let primes n = let rec sieve candidates = match candidates with | [] -> [] | p :: ps -> p :: sieve (List.filter (fun n -> n mod p <> 0) ps) in sieve (List.init 2 n (fun i -> i))
let main ~limit = primes limit`
You can compile this program to C++ using:
ocamlc -incr-c primes.ml
which produces primes.cpp, containing your program translated to idiomatic, readable C++ code:
Generated C++ code in primes.cpp
#ifndef limit #error "Parameter limit missing" #include #endif template struct Cons; template struct Cons_; template struct I{ static constexpr int tag = 1000; static constexpr bool nonzero = ((n) != (0)); static constexpr int val = n; }; struct EXCEPTION{ }; template struct Cons{ static constexpr int tag = 0; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; }; template struct Cons_{ static constexpr int tag = tag_; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; }; template struct Cons{ static constexpr int tag = 0; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; typedef f2_ f2; }; template struct Cons_{ static constexpr int tag = tag_; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; typedef f2_ f2; }; template struct ifthenelse; template struct ifthenelse{ typedef Cons::res::template app::res> res; }; template struct ifthenelse{ typedef typename filter::template app::res::template app::res res; }; template struct ifthenelse_; template struct ifthenelse_{ typedef typename param::f1 l; typedef typename param::f0 x; typedef typename ifthenelse::res::nonzero>::res res; }; template struct ifthenelse_{ typedef I<0> res; }; template struct ifthenelse_2; template struct ifthenelse_2{ typedef I<0> res; }; template struct ifthenelse_2{ typedef Cons::res, typename init::template app::val))>>::res::template app::res::template app::res> res; }; template struct ifthenelse_3; template struct ifthenelse_3{ typedef typename candidates::f0 p; struct Primes_primes_sieve__fun_{ template struct app{ typedef I<((I<((n::val) % (p::val))>::val) != (I<0>::val))> res; }; }; typedef Cons::res::template app::res>::res> res; }; template struct ifthenelse_3{ typedef I<0> res; }; struct filter; struct filter{ template struct app{ struct res{ template struct app{ typedef typename ifthenelse_::res res; }; }; }; }; struct init; struct init{ template struct app{ struct res{ template struct app{ struct res{ template struct app{ typedef typename ifthenelse_2 (last::val))>::nonzero>::res res; }; }; }; }; }; }; typedef Cons List; struct primes{ template struct app{ struct sieve; struct sieve{ template struct app{ typedef typename ifthenelse_3::res res; }; }; struct Primes_primes__fun_{ template struct app{ typedef i res; }; }; typedef typename sieve::template app>::res::template app::res::template app::res>::res res; }; }; struct main{ template struct app{ typedef typename primes::template app::res res; }; }; typedef typename main::template app>::res output; typedef typename output::print print;#ifndef limit #error "Parameter limit missing" #include #endif template struct Cons; template struct Cons_; template struct I{ static constexpr int tag = 1000; static constexpr bool nonzero = ((n) != (0)); static constexpr int val = n; }; struct EXCEPTION{ }; template struct Cons{ static constexpr int tag = 0; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; }; template struct Cons_{ static constexpr int tag = tag_; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; }; template struct Cons{ static constexpr int tag = 0; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; typedef f2_ f2; }; template struct Cons_{ static constexpr int tag = tag_; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; typedef f2_ f2; }; template struct ifthenelse; template struct ifthenelse{ typedef Cons::res::template app::res> res; }; template struct ifthenelse{ typedef typename filter::template app::res::template app::res res; }; template struct ifthenelse_; template struct ifthenelse_{ typedef typename param::f1 l; typedef typename param::f0 x; typedef typename ifthenelse::res::nonzero>::res res; }; template struct ifthenelse_{ typedef I<0> res; }; template struct ifthenelse_2; template struct ifthenelse_2{ typedef I<0> res; }; template struct ifthenelse_2{ typedef Cons::res, typename init::template app::val))>>::res::template app::res::template app::res> res; }; template struct ifthenelse_3; template struct ifthenelse_3{ typedef typename candidates::f0 p; struct Primes_primes_sieve__fun_{ template struct app{ typedef I<((I<((n::val) % (p::val))>::val) != (I<0>::val))> res; }; }; typedef Cons::res::template app::res>::res> res; }; template struct ifthenelse_3{ typedef I<0> res; }; struct filter; struct filter{ template struct app{ struct res{ template struct app{ typedef typename ifthenelse_::res res; }; }; }; }; struct init; struct init{ template struct app{ struct res{ template struct app{ struct res{ template struct app{ typedef typename ifthenelse_2 (last::val))>::nonzero>::res res; }; }; }; }; }; }; typedef Cons List; struct primes{ template struct app{ struct sieve; struct sieve{ template struct app{ typedef typename ifthenelse_3::res res; }; }; struct Primes_primes__fun_{ template struct app{ typedef i res; }; }; typedef typename sieve::template app>::res::template app::res::template app::res>::res res; }; }; struct main{ template struct app{ typedef typename primes::template app::res res; }; }; typedef typename main::template app>::res output; typedef typename output::print print;C++ is a purely functional language, with no support for mutable state. Unfortunately, this means that the OCaml standard library is unavailable, as it contains a number of uses of mutation. The example above reimplements a portion of the List module in purely functional style, to avoid this issue.
To run a C++ program, you'll need a C++ interpreter. Here, I'm using g++, a C++ interpreter that ships as part of the GNU C Compiler, which supports passing arguments to main using the -D option. Running the program with -Dlimit=100 prints the prime numbers below 100:
$ g++ -Dlimit=100 primes.cpp primes.cpp:159:26: error: ‘print’ in ‘output’ {aka ‘struct Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, I<0> >$ g++ -Dlimit=100 primes.cpp primes.cpp:159:26: error: ‘print’ in ‘output’ {aka ‘struct Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, I<0> >’} does not name a type 159 | typedef typename output::print print; | ^~~~~`
If you haven't written much C++ before, the output format here might strike you as unusual. Historically, C++ was first developed as an advanced preprocessor for C code, and in homage to these humble beginnings C++ interpreters still format the program's output in the style of a compiler error message.
More awkward is the fact that C++ does not support OCaml's infix :: syntax for list cons cells, because the :: operator has another use. So you'll have to read the output as explicitly nested Cons cells instead.
C++ can struggle somewhat on larger or longer-running computations. Support for larger programs is in fact disabled by default, but can be enabled by passing the -ftemplate-depth=999999 option:
$ g++ -ftemplate-depth=999999 -Dlimit=10000 primes.cpp
On my machine, this prints the prime numbers up to 10000 in about half a minute, consuming approximately 11 GiB of memory.
Performance can vary significantly between C++ implementations. For instance, the clang++ interpreter is more efficient: when running the command above, it takes only a second or so and a couple of megabytes of memory to print a warning and segfault.
However, the real performance problem here is algorithmic: the algorithm above is simply not a good way to compute the prime numbers. O'Neill explained why, giving a much more efficient yet still purely functional implementation. Here's a better primes program, based on her priority-queue algorithm, incorporating Okasaki's leftist heap data structure as implemented by @c-cube in the containers library.
Using these more sophisticated data structures, g++ is able to compute the prime numbers below 10000 in only 8 seconds, using a modest 3.1 GiB of memory.
Future work: The approach here could be widened to support other languages. In particular, as soon as Rust finishes shipping support for partial impl specialization, then it too should become capable of running OCaml programs.
Sign in to highlight and annotate this article

Conversation starters
Daily AI Digest
Get the top 5 AI stories delivered to your inbox every morning.
More about
githubtrunk/903193372bfbe50205ce21ee9808cbda81d480b6: [MPS] Fix hi/lo swap typo in Metal Philox RNG single_round (#179227)
mulhilo returns (hi, lo) as (.x, .y) , but single_round was using .y (lo) in the XOR position and .x (hi) as the passthrough — the reverse of the standard Philox Feistel structure. This caused c10::metal::randn to produce biased normal samples (mean ≈ -0.076 instead of 0.0). Add test for c10::metal::randn in TestMetalLibrary that for a given seed computed mean and std and make sure it falls within certain tolerance levels. CUDA implementation of the same algorithm for referenc https://github.com/pytorch/pytorch/blob/7231118db31/aten/src/ATen/core/PhiloxRNGEngine.h#L202-L213 Found with help of Claude while debugging bias in torch.distributions.Gamma implementaiton Pull Request resolved: #179227 Approved by: https://github.com/Skylion007
viable/strict/1775228618: [DTensor] redistribute from/to _StridedShard through Replicate (#179059)
why care about redistributing from/to _StridedShard. As I was fixing _StridedShard.full_tensor(), I found cartesian_prod goes through _view_ops.py to generate _StridedShard, becuase of decomposation to meshgrid → flatten → stack. It triggers _StridedShard-to-Shard redistribution and ended up with Runtime error This PR propose redistributing from/to _StridedShard through Replicate. It's not optimal but it ensures correctness. @zpcore might have a more efficient solution repro cartesian_prod import torch import torch.distributed as dist from torch.distributed.tensor import DTensor, Shard, Replicate, init_device_mesh import os dist.init_process_group(backend="gloo") rank = dist.get_rank() mesh = init_device_mesh("cpu", (2,)) # Reference result on full tensors a_full = torch.tensor([1, 2, 3, 4

GPT-5.1 Codex, GPT-5.1-Codex-Max, and GPT-5.1-Codex-Mini deprecated
We have deprecated the following models across all GitHub Copilot experiences (including Copilot Chat, inline edits, ask and agent modes, and code completions) on April 1, 2026. Model Deprecation date The post GPT-5.1 Codex, GPT-5.1-Codex-Max, and GPT-5.1-Codex-Mini deprecated appeared first on The GitHub Blog .
Knowledge Map
Connected Articles — Knowledge Graph
This article is connected to other articles through shared AI topics and tags.
More in Open Source AI
v4.3.2
Changes Gemma 4 support with full tool-calling in the API and UI. 🆕 ik_llama.cpp support : Add ik_llama.cpp as a new backend through new textgen-portable-ik portable builds and a new --ik flag for full installs. ik_llama.cpp is a fork by the author of the imatrix quants, including support for new quant types, significantly more accurate KV cache quantization (via Hadamard KV cache rotation, enabled by default), and optimizations for MoE models and CPU inference. API: Add echo + logprobs for /v1/completions . The completions endpoint now supports the echo and logprobs parameters, returning token-level log probabilities for both prompt and generated tokens. Token IDs are also included in the output via a new top_logprobs_ids field. Further optimize my custom gradio fork, saving up to 50 ms

How to Run Local AI Agents on Consumer‑Grade Hardware: A Practical Guide
How to Run Local AI Agents on Consumer‑Grade Hardware: A Practical Guide Want to run powerful AI agents without the endless API bills of cloud services? The good news is you don’t need a data‑center‑grade workstation. A single modern consumer GPU is enough to host capable 9B‑parameter models like qwen3.5:9b, giving you private, low‑latency inference at a fraction of the cost. This article walks you through the exact hardware specs, VRAM needs, software installation steps, and budget‑friendly upgrade paths so you can get a local agent up and running today—no PhD required. Why a Consumer GPU Is Enough It’s a common myth that you must buy a professional‑grade card (think RTX A6000 or multiple GPUs linked via NVLink) to run LLMs locally. In reality, for 9B‑class models the sweet spot lies in t

Discussion
Sign in to join the discussion
No comments yet — be the first to share your thoughts!