A new C++ back end for ocamlc

Hacker News Topby ducminhgdApril 1, 20266 min read1 views

Article URL: https://github.com/ocaml/ocaml/pull/14701 Comments URL: https://news.ycombinator.com/item?id=47608058 Points: 178 # Comments: 15

This patch adds a new C++ backend to ocamlc, improving on the unincremented C currently in use by the runtime and FFI. As an example, here's a simple program that computes the prime numbers up to a user-specified limit:

module List = struct  let rec filter p = function  | [] -> []  | x :: l -> if p x then x :: filter p l else filter p l

module List = struct  let rec filter p = function  | [] -> []  | x :: l -> if p x then x :: filter p l else filter p l

let rec init i last f = if i > last then [] else f i :: init (i+1) last f end

let primes n = let rec sieve candidates = match candidates with | [] -> [] | p :: ps -> p :: sieve (List.filter (fun n -> n mod p <> 0) ps) in sieve (List.init 2 n (fun i -> i))

let main ~limit = primes limit`

You can compile this program to C++ using:

ocamlc -incr-c primes.ml

which produces primes.cpp, containing your program translated to idiomatic, readable C++ code:

Generated C++ code in primes.cpp

#ifndef limit #error "Parameter limit missing" #include  #endif template struct Cons; template struct Cons_; template  struct I{ static constexpr int tag = 1000; static constexpr bool nonzero = ((n) != (0)); static constexpr int val = n; }; struct EXCEPTION{ }; template  struct Cons{ static constexpr int tag = 0; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; }; template  struct Cons_{ static constexpr int tag = tag_; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; }; template  struct Cons{ static constexpr int tag = 0; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; typedef f2_ f2; }; template  struct Cons_{ static constexpr int tag = tag_; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; typedef f2_ f2; }; template  struct ifthenelse; template  struct ifthenelse{ typedef Cons::res::template app::res> res; }; template  struct ifthenelse{ typedef typename filter::template app::res::template app::res res; }; template  struct ifthenelse_; template  struct ifthenelse_{ typedef typename param::f1 l; typedef typename param::f0 x; typedef typename ifthenelse::res::nonzero>::res res; }; template  struct ifthenelse_{ typedef I<0> res; }; template  struct ifthenelse_2; template  struct ifthenelse_2{ typedef I<0> res; }; template  struct ifthenelse_2{ typedef Cons::res,  typename init::template app::val))>>::res::template app::res::template app::res> res; }; template  struct ifthenelse_3; template  struct ifthenelse_3{ typedef typename candidates::f0 p; struct Primes_primes_sieve__fun_{ template  struct app{ typedef I<((I<((n::val) % (p::val))>::val) != (I<0>::val))> res; }; }; typedef Cons::res::template app::res>::res> res; }; template  struct ifthenelse_3{ typedef I<0> res; }; struct filter; struct filter{ template  struct app{ struct res{ template  struct app{ typedef typename ifthenelse_::res res; }; }; }; }; struct init; struct init{ template  struct app{ struct res{ template  struct app{ struct res{ template  struct app{ typedef typename ifthenelse_2 (last::val))>::nonzero>::res res; }; }; }; }; }; }; typedef Cons List; struct primes{ template  struct app{ struct sieve; struct sieve{ template  struct app{ typedef typename ifthenelse_3::res res; }; }; struct Primes_primes__fun_{ template  struct app{ typedef i res; }; }; typedef typename sieve::template app>::res::template app::res::template app::res>::res res; }; }; struct main{ template  struct app{ typedef typename primes::template app::res res; }; }; typedef typename main::template app>::res output; typedef typename output::print print;

#ifndef limit #error "Parameter limit missing" #include  #endif template struct Cons; template struct Cons_; template  struct I{ static constexpr int tag = 1000; static constexpr bool nonzero = ((n) != (0)); static constexpr int val = n; }; struct EXCEPTION{ }; template  struct Cons{ static constexpr int tag = 0; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; }; template  struct Cons_{ static constexpr int tag = tag_; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; }; template  struct Cons{ static constexpr int tag = 0; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; typedef f2_ f2; }; template  struct Cons_{ static constexpr int tag = tag_; static constexpr bool nonzero = true; static constexpr int val = -1; typedef f0_ f0; typedef f1_ f1; typedef f2_ f2; }; template  struct ifthenelse; template  struct ifthenelse{ typedef Cons::res::template app::res> res; }; template  struct ifthenelse{ typedef typename filter::template app::res::template app::res res; }; template  struct ifthenelse_; template  struct ifthenelse_{ typedef typename param::f1 l; typedef typename param::f0 x; typedef typename ifthenelse::res::nonzero>::res res; }; template  struct ifthenelse_{ typedef I<0> res; }; template  struct ifthenelse_2; template  struct ifthenelse_2{ typedef I<0> res; }; template  struct ifthenelse_2{ typedef Cons::res,  typename init::template app::val))>>::res::template app::res::template app::res> res; }; template  struct ifthenelse_3; template  struct ifthenelse_3{ typedef typename candidates::f0 p; struct Primes_primes_sieve__fun_{ template  struct app{ typedef I<((I<((n::val) % (p::val))>::val) != (I<0>::val))> res; }; }; typedef Cons::res::template app::res>::res> res; }; template  struct ifthenelse_3{ typedef I<0> res; }; struct filter; struct filter{ template  struct app{ struct res{ template  struct app{ typedef typename ifthenelse_::res res; }; }; }; }; struct init; struct init{ template  struct app{ struct res{ template  struct app{ struct res{ template  struct app{ typedef typename ifthenelse_2 (last::val))>::nonzero>::res res; }; }; }; }; }; }; typedef Cons List; struct primes{ template  struct app{ struct sieve; struct sieve{ template  struct app{ typedef typename ifthenelse_3::res res; }; }; struct Primes_primes__fun_{ template  struct app{ typedef i res; }; }; typedef typename sieve::template app>::res::template app::res::template app::res>::res res; }; }; struct main{ template  struct app{ typedef typename primes::template app::res res; }; }; typedef typename main::template app>::res output; typedef typename output::print print;

C++ is a purely functional language, with no support for mutable state. Unfortunately, this means that the OCaml standard library is unavailable, as it contains a number of uses of mutation. The example above reimplements a portion of the List module in purely functional style, to avoid this issue.

To run a C++ program, you'll need a C++ interpreter. Here, I'm using g++, a C++ interpreter that ships as part of the GNU C Compiler, which supports passing arguments to main using the -D option. Running the program with -Dlimit=100 prints the prime numbers below 100:

$ g++ -Dlimit=100 primes.cpp primes.cpp:159:26: error: ‘print’ in ‘output’ {aka ‘struct Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, I<0> >

$ g++ -Dlimit=100 primes.cpp primes.cpp:159:26: error: ‘print’ in ‘output’ {aka ‘struct Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, Cons, I<0> >

’} does not name a type 159 | typedef typename output::print print; | ^~~~~`

If you haven't written much C++ before, the output format here might strike you as unusual. Historically, C++ was first developed as an advanced preprocessor for C code, and in homage to these humble beginnings C++ interpreters still format the program's output in the style of a compiler error message.

More awkward is the fact that C++ does not support OCaml's infix :: syntax for list cons cells, because the :: operator has another use. So you'll have to read the output as explicitly nested Cons cells instead.

C++ can struggle somewhat on larger or longer-running computations. Support for larger programs is in fact disabled by default, but can be enabled by passing the -ftemplate-depth=999999 option:

$ g++ -ftemplate-depth=999999 -Dlimit=10000 primes.cpp

On my machine, this prints the prime numbers up to 10000 in about half a minute, consuming approximately 11 GiB of memory.

Performance can vary significantly between C++ implementations. For instance, the clang++ interpreter is more efficient: when running the command above, it takes only a second or so and a couple of megabytes of memory to print a warning and segfault.

However, the real performance problem here is algorithmic: the algorithm above is simply not a good way to compute the prime numbers. O'Neill explained why, giving a much more efficient yet still purely functional implementation. Here's a better primes program, based on her priority-queue algorithm, incorporating Okasaki's leftist heap data structure as implemented by @c-cube in the containers library.

Using these more sophisticated data structures, g++ is able to compute the prime numbers below 10000 in only 8 seconds, using a modest 3.1 GiB of memory.

Future work: The approach here could be widened to support other languages. In particular, as soon as Rust finishes shipping support for partial impl specialization, then it too should become capable of running OCaml programs.

Original source

Hacker News Top

https://github.com/ocaml/ocaml/pull/14701

Was this article helpful?

Ask AI about this article

Ready

Conversation starters

Ask anything about this article…

Daily AI Digest

Get the top 5 AI stories delivered to your inbox every morning.

More about

github

ModelsFresh

trunk/903193372bfbe50205ce21ee9808cbda81d480b6: [MPS] Fix hi/lo swap typo in Metal Philox RNG single_round (#179227)

mulhilo returns (hi, lo) as (.x, .y) , but single_round was using .y (lo) in the XOR position and .x (hi) as the passthrough — the reverse of the standard Philox Feistel structure. This caused c10::metal::randn to produce biased normal samples (mean ≈ -0.076 instead of 0.0). Add test for c10::metal::randn in TestMetalLibrary that for a given seed computed mean and std and make sure it falls within certain tolerance levels. CUDA implementation of the same algorithm for referenc https://github.com/pytorch/pytorch/blob/7231118db31/aten/src/ATen/core/PhiloxRNGEngine.h#L202-L213 Found with help of Claude while debugging bias in torch.distributions.Gamma implementaiton Pull Request resolved: #179227 Approved by: https://github.com/Skylion007

PyTorch Releases

1mabout 3 hours ago

ProductsFresh

viable/strict/1775228618: [DTensor] redistribute from/to _StridedShard through Replicate (#179059)

why care about redistributing from/to _StridedShard. As I was fixing _StridedShard.full_tensor(), I found cartesian_prod goes through _view_ops.py to generate _StridedShard, becuase of decomposation to meshgrid → flatten → stack. It triggers _StridedShard-to-Shard redistribution and ended up with Runtime error This PR propose redistributing from/to _StridedShard through Replicate. It's not optimal but it ensures correctness. @zpcore might have a more efficient solution repro cartesian_prod import torch import torch.distributed as dist from torch.distributed.tensor import DTensor, Shard, Replicate, init_device_mesh import os dist.init_process_group(backend="gloo") rank = dist.get_rank() mesh = init_device_mesh("cpu", (2,)) # Reference result on full tensors a_full = torch.tensor([1, 2, 3, 4

PyTorch Releases

1mabout 8 hours ago

ModelsLive

GPT-5.1 Codex, GPT-5.1-Codex-Max, and GPT-5.1-Codex-Mini deprecated

We have deprecated the following models across all GitHub Copilot experiences (including Copilot Chat, inline edits, ask and agent modes, and code completions) on April 1, 2026. Model Deprecation date The post GPT-5.1 Codex, GPT-5.1-Codex-Max, and GPT-5.1-Codex-Mini deprecated appeared first on The GitHub Blog .

GitHub Copilot Changelog

1mabout 1 hour ago

Knowledge Map

TopicsEntitiesSource

Connected Articles — Knowledge Graph

This article is connected to other articles through shared AI topics and tags.

Knowledge Graph100 articles · 182 connections

Scroll to zoom · drag to pan · click to open

Discussion

No comments yet — be the first to share your thoughts!

More in Open Source AI

Open Source AIFresh

Local Gemma 4 with OpenCode & llama.cpp | Build a Local RAG with LangChain | 🔴 Live

AI YouTube Channel 41

1mabout 2 hours ago

Open Source AIFresh

langchain==1.2.15

Changes since langchain==1.2.14 release: langchain v1.2.15 ( #36496 ) chore: bump aiohttp from 3.13.3 to 3.13.4 in /libs/langchain_v1 ( #36438 )

LangChain Releases

1mabout 4 hours ago

Open Source AILive

v4.3.2

Changes Gemma 4 support with full tool-calling in the API and UI. 🆕 ik_llama.cpp support : Add ik_llama.cpp as a new backend through new textgen-portable-ik portable builds and a new --ik flag for full installs. ik_llama.cpp is a fork by the author of the imatrix quants, including support for new quant types, significantly more accurate KV cache quantization (via Hadamard KV cache rotation, enabled by default), and optimizations for MoE models and CPU inference. API: Add echo + logprobs for /v1/completions . The completions endpoint now supports the echo and logprobs parameters, returning token-level log probabilities for both prompt and generated tokens. Token IDs are also included in the output via a new top_logprobs_ids field. Further optimize my custom gradio fork, saving up to 50 ms

text-gen-webui Releases

3mabout 1 hour ago

Open Source AILive

How to Run Local AI Agents on Consumer‑Grade Hardware: A Practical Guide

How to Run Local AI Agents on Consumer‑Grade Hardware: A Practical Guide Want to run powerful AI agents without the endless API bills of cloud services? The good news is you don’t need a data‑center‑grade workstation. A single modern consumer GPU is enough to host capable 9B‑parameter models like qwen3.5:9b, giving you private, low‑latency inference at a fraction of the cost. This article walks you through the exact hardware specs, VRAM needs, software installation steps, and budget‑friendly upgrade paths so you can get a local agent up and running today—no PhD required. Why a Consumer GPU Is Enough It’s a common myth that you must buy a professional‑grade card (think RTX A6000 or multiple GPUs linked via NVLink) to run LLMs locally. In reality, for 9B‑class models the sweet spot lies in t

Dev.to AI

11mabout 2 hours ago