Regex & text processing: split, match, replace

One small task in all seven languages: take the string "alice=30, bob=25, carol=35", split it into key=value entries, match each with a named-capture regex to print "alice is 30; bob is 25; carol is 35", then replace every name=age with age:name to get "30:alice, 25:bob, 35:carol". Watch where the regex lives — a first-class literal woven into the language (guji, Perl, Raku), a compiled object from a library (Rust, Python, OCaml), or the standard library's regexp package (Go). Notice especially how each names its captures ((?<name>…) vs (?P<name>…)) and how the replacement template refers back to them.

guji

sub main(): Int {
    $text = "alice=30, bob=25, carol=35"

    # split, then match each entry with named captures
    @people = $text.split(", ").map(sub($p) {
        match $p ~~ /(?<name>\w+)=(?<age>\d+)/ {
            Some($m) { "{ $m<name>.unwrap_or('?') } is { $m<age>.unwrap_or('?') }" }
            None     { $p }
        }
    })
    print(@people.join("; "))

    # replace: swap each name=age into age:name via a template
    print($text.replace(/(?<n>\w+)=(?<a>\d+)/, '$<a>:$<n>'))
    0
}

sub main(): Int {
    $text = "alice=30, bob=25, carol=35"

    # split, then match each entry with named captures
    @people = $text.split(", ").map(sub($p) {
        match $p ~~ /(?<name>\w+)=(?<age>\d+)/ {
            Some($m) { "{ $m<name>.unwrap_or('?') } is { $m<age>.unwrap_or('?') }" }
            None     { $p }
        }
    })
    print(@people.join("; "))

    # replace: swap each name=age into age:name via a template
    print($text.replace(/(?<n>\w+)=(?<a>\d+)/, '$<a>:$<n>'))
    0
}

Regex is a first-class type in guji (§13): the literal /(?<name>\w+)=(?<age>\d+)/ needs no library, and the ~~ operator yields Option[Match] so a non-match is a None you must handle. Each named capture is itself Option[Str], hence .unwrap_or('?'). split, replace, and map all follow the data-first convention (§7.2) so they chain with .; the replace template uses single quotes so its $<a> / $<n> group tokens (§15.3) are not eaten by string interpolation.

package main

import (
	"fmt"
	"regexp"
	"strings"
)

func main() {
	text := "alice=30, bob=25, carol=35"
	re := regexp.MustCompile(`(?P<name>\w+)=(?P<age>\d+)`)

	var people []string
	for _, p := range strings.Split(text, ", ") {
		if m := re.FindStringSubmatch(p); m != nil {
			people = append(people, fmt.Sprintf("%s is %s", m[1], m[2]))
		} else {
			people = append(people, p)
		}
	}
	fmt.Println(strings.Join(people, "; "))

	fmt.Println(re.ReplaceAllString(text, "$age:$name"))
}

package main

import (
	"fmt"
	"regexp"
	"strings"
)

func main() {
	text := "alice=30, bob=25, carol=35"
	re := regexp.MustCompile(`(?P<name>\w+)=(?P<age>\d+)`)

	var people []string
	for _, p := range strings.Split(text, ", ") {
		if m := re.FindStringSubmatch(p); m != nil {
			people = append(people, fmt.Sprintf("%s is %s", m[1], m[2]))
		} else {
			people = append(people, p)
		}
	}
	fmt.Println(strings.Join(people, "; "))

	fmt.Println(re.ReplaceAllString(text, "$age:$name"))
}

Go keeps regex in the standard library's regexp package (RE2 syntax — linear-time, no backreferences) and plain string splitting in strings. MustCompile panics on a bad pattern at startup, the idiom for a constant regex. Named groups use the (?P<name>…) form; FindStringSubmatch returns a []string indexed positionally (m[0] is the whole match), while the ReplaceAllString template refers to them by name with $age / $name.

OCaml

let () =
  let text = "alice=30, bob=25, carol=35" in
  let re = Re.Pcre.re {|(?<name>\w+)=(?<age>\d+)|} |> Re.compile in
  let people =
    String.split_on_char ',' text
    |> List.map String.trim
    |> List.map (fun p ->
         match Re.exec_opt re p with
         | Some g -> Re.Group.get g 1 ^ " is " ^ Re.Group.get g 2
         | None -> p)
  in
  print_endline (String.concat "; " people);
  let swapped =
    Re.replace re text ~f:(fun g ->
      Re.Group.get g 2 ^ ":" ^ Re.Group.get g 1)
  in
  print_endline swapped

let () =
  let text = "alice=30, bob=25, carol=35" in
  let re = Re.Pcre.re {|(?<name>\w+)=(?<age>\d+)|} |> Re.compile in
  let people =
    String.split_on_char ',' text
    |> List.map String.trim
    |> List.map (fun p ->
         match Re.exec_opt re p with
         | Some g -> Re.Group.get g 1 ^ " is " ^ Re.Group.get g 2
         | None -> p)
  in
  print_endline (String.concat "; " people);
  let swapped =
    Re.replace re text ~f:(fun g ->
      Re.Group.get g 2 ^ ":" ^ Re.Group.get g 1)
  in
  print_endline swapped

OCaml has no regex in its standard library, so the idiomatic choice is the Re library, here using its PCRE-flavoured front end Re.Pcre.re then Re.compile. {|...|} is a raw string literal so backslashes need no escaping. Re.exec_opt returns Re.Group.t option, and Re.Group.get g 1 pulls a numbered group; Re.replace ... ~f takes a function from the match groups to the replacement, so the swap is expressed as plain string concatenation rather than a template.

Perl

use strict;
use warnings;

my $text = "alice=30, bob=25, carol=35";

my @people = map {
    /(?<name>\w+)=(?<age>\d+)/ ? "$+{name} is $+{age}" : $_;
} split /,\s*/, $text;
print join("; ", @people), "\n";

(my $swapped = $text) =~ s/(?<n>\w+)=(?<a>\d+)/$+{a}:$+{n}/g;
print "$swapped\n";

use strict;
use warnings;

my $text = "alice=30, bob=25, carol=35";

my @people = map {
    /(?<name>\w+)=(?<age>\d+)/ ? "$+{name} is $+{age}" : $_;
} split /,\s*/, $text;
print join("; ", @people), "\n";

(my $swapped = $text) =~ s/(?<n>\w+)=(?<a>\d+)/$+{a}:$+{n}/g;
print "$swapped\n";

Regex is woven into Perl's syntax: m//, s///, and split are operators, not library calls, and a bare /.../ matches against the topic $_ that map sets for each element. Named captures land in the magic %+ hash, read as $+{name}. The substitution copies first — (my $swapped = $text) =~ s/.../.../g — so the original string is left intact, and the /g flag rewrites every entry.

Raku

my $text = "alice=30, bob=25, carol=35";

my @people = $text.split(", ").map: {
    $_ ~~ / $<name>=(\w+) '=' $<age>=(\d+) /
        ?? "$<name> is $<age>"
        !! $_;
};
say @people.join("; ");

say $text.subst(
    / $<n>=(\w+) '=' $<a>=(\d+) /,
    { "$<a>:$<n>" }, :g);

my $text = "alice=30, bob=25, carol=35";

my @people = $text.split(", ").map: {
    $_ ~~ / $<name>=(\w+) '=' $<age>=(\d+) /
        ?? "$<name> is $<age>"
        !! $_;
};
say @people.join("; ");

say $text.subst(
    / $<n>=(\w+) '=' $<a>=(\d+) /,
    { "$<a>:$<n>" }, :g);

Raku promotes regexes to full grammar objects; the ~~ smartmatch sets the match variable $/, and named captures are reached as $<name>. Inside a regex, literal text must be quoted ('=') because bareword space is insignificant, and $<name>=(...) is the named-capture form. .subst is the non-mutating replace; passing a { … } block as the replacement lets you compute $<a>:$<n> from each match, with :g for all occurrences.

Rust

use regex::Regex;

fn main() {
    let text = "alice=30, bob=25, carol=35";
    let re = Regex::new(r"(?<name>\w+)=(?<age>\d+)").unwrap();

    let people: Vec<String> = text
        .split(", ")
        .map(|p| match re.captures(p) {
            Some(c) => format!("{} is {}", &c["name"], &c["age"]),
            None => p.to_string(),
        })
        .collect();
    println!("{}", people.join("; "));

    println!("{}", re.replace_all(text, "$age:$name"));
}

use regex::Regex;

fn main() {
    let text = "alice=30, bob=25, carol=35";
    let re = Regex::new(r"(?<name>\w+)=(?<age>\d+)").unwrap();

    let people: Vec<String> = text
        .split(", ")
        .map(|p| match re.captures(p) {
            Some(c) => format!("{} is {}", &c["name"], &c["age"]),
            None => p.to_string(),
        })
        .collect();
    println!("{}", people.join("; "));

    println!("{}", re.replace_all(text, "$age:$name"));
}

Rust's regex lives in the external regex crate (RE2-style, guaranteed linear time). Regex::new returns a Result, so .unwrap() asserts the literal compiles. captures gives an Option<Captures>; indexing &c["name"] panics if the group is absent, which is safe here because the match succeeded. replace_all takes a template string where $age / $name interpolate the named groups, and returns a Cow<str> so an unchanged string needn't be reallocated.

Python

import re

text = "alice=30, bob=25, carol=35"
pat = re.compile(r"(?P<name>\w+)=(?P<age>\d+)")

people = []
for part in text.split(", "):
    m = pat.match(part)
    people.append(f"{m['name']} is {m['age']}" if m else part)
print("; ".join(people))

print(re.sub(r"(?P<n>\w+)=(?P<a>\d+)", r"\g<a>:\g<n>", text))

import re

text = "alice=30, bob=25, carol=35"
pat = re.compile(r"(?P<name>\w+)=(?P<age>\d+)")

people = []
for part in text.split(", "):
    m = pat.match(part)
    people.append(f"{m['name']} is {m['age']}" if m else part)
print("; ".join(people))

print(re.sub(r"(?P<n>\w+)=(?P<a>\d+)", r"\g<a>:\g<n>", text))

Python's re module is standard but explicit: you compile a pattern (or call re.sub directly) rather than writing a regex literal. Named groups use the (?P<name>…) syntax; a successful match returns a Match object indexable by group name, m['name']. In a re.sub replacement, named back-references are written \g<name>, and the r"..." raw strings keep the backslashes literal in both the pattern and the template.