View Javadoc
1   package io.jawk;
2   
3   /*-
4    * ╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲
5    * Jawk
6    * ჻჻჻჻჻჻
7    * Copyright (C) 2006 - 2026 MetricsHub
8    * ჻჻჻჻჻჻
9    * This program is free software: you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation, either version 3 of the
12   * License, or (at your option) any later version.
13   *
14   * This program is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU General Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU General Lesser Public
20   * License along with this program.  If not, see
21   * <http://www.gnu.org/licenses/lgpl-3.0.html>.
22   * ╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱
23   */
24  
25  import java.io.ByteArrayInputStream;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.OutputStream;
29  import java.io.PrintStream;
30  import java.io.Reader;
31  import java.io.StringReader;
32  import java.nio.charset.StandardCharsets;
33  import java.util.ArrayList;
34  import java.util.Arrays;
35  import java.util.Collection;
36  import java.util.Collections;
37  import java.util.LinkedHashMap;
38  import java.util.List;
39  import java.util.Map;
40  import java.util.Objects;
41  import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
42  import io.jawk.backend.AVM;
43  import io.jawk.ext.ExtensionFunction;
44  import io.jawk.ext.ExtensionRegistry;
45  import io.jawk.ext.JawkExtension;
46  import io.jawk.frontend.AwkParser;
47  import io.jawk.frontend.AstNode;
48  import io.jawk.jrt.AppendableAwkSink;
49  import io.jawk.jrt.AwkSink;
50  import io.jawk.jrt.InputSource;
51  import io.jawk.jrt.OutputStreamAwkSink;
52  import io.jawk.jrt.StreamInputSource;
53  import io.jawk.util.AwkSettings;
54  import io.jawk.util.ScriptSource;
55  
56  /**
57   * Entry point into the parsing, analysis, and execution
58   * of a Jawk script.
59   * This entry point is used both when Jawk is executed as a library and when
60   * invoked from the command line.
61   * <p>
62   * The overall process to execute a Jawk script is as follows:
63   * <ul>
64   * <li>Parse the Jawk script, producing an abstract syntax tree.
65   * <li>Traverse the abstract syntax tree, producing a list of
66   * instruction tuples for the interpreter.
67   * <li>Traverse the list of tuples, providing a runtime which
68   * ultimately executes the Jawk script, <strong>or</strong>
69   * Command-line parameters dictate which action is to take place.
70   * </ul>
71   * Two additional semantic checks on the syntax tree are employed
72   * (both to resolve function calls for defined functions).
73   * As a result, the syntax tree is traversed three times.
74   * And the number of times tuples are traversed is depends
75   * on whether interpretation or compilation takes place.
76   * <p>
77   * The engine does not enable any extensions automatically. Extensions can be
78   * provided programmatically via the {@link Awk#Awk(Collection)} constructors or
79   * via the command line when using the CLI entry point.
80   *
81   * @see io.jawk.backend.AVM
82   * @author Danny Daglas
83   */
84  public class Awk {
85  
86  	/** POSIX default field separator ({@code " "}). */
87  	public static final String DEFAULT_FS = " ";
88  
89  	/** POSIX default record separator ({@code "\n"}). */
90  	public static final String DEFAULT_RS = "\n";
91  
92  	/** POSIX default output field separator ({@code " "}). */
93  	public static final String DEFAULT_OFS = " ";
94  
95  	/** POSIX default output record separator ({@code "\n"}). */
96  	public static final String DEFAULT_ORS = "\n";
97  
98  	/** POSIX default number-to-string conversion format ({@code "%.6g"}). */
99  	public static final String DEFAULT_CONVFMT = "%.6g";
100 
101 	/** POSIX default output number format ({@code "%.6g"}). */
102 	public static final String DEFAULT_OFMT = "%.6g";
103 
104 	/** POSIX default subscript separator ({@code "\034"}). */
105 	public static final String DEFAULT_SUBSEP = String.valueOf((char) 28);
106 
107 	private final Map<String, ExtensionFunction> extensionFunctions;
108 
109 	private final Map<String, JawkExtension> extensionInstances;
110 
111 	/**
112 	 * The behavioral settings used by this engine instance.
113 	 */
114 	private final AwkSettings settings;
115 
116 	/**
117 	 * The last parsed {@link AstNode} produced during compilation.
118 	 */
119 	private AstNode lastAst;
120 
121 	/**
122 	 * Create a new instance of Awk without extensions.
123 	 */
124 	public Awk() {
125 		this(new AwkSettings());
126 	}
127 
128 	/**
129 	 * Create a new instance of Awk with the specified settings.
130 	 *
131 	 * @param settings behavioral configuration for this engine
132 	 */
133 	public Awk(AwkSettings settings) {
134 		this(ExtensionSetup.EMPTY, settings);
135 	}
136 
137 	/**
138 	 * Create a new instance of Awk with the specified extension instances.
139 	 *
140 	 * @param extensions extension instances implementing {@link JawkExtension}
141 	 */
142 	public Awk(Collection<? extends JawkExtension> extensions) {
143 		this(createExtensionSetup(extensions));
144 	}
145 
146 	/**
147 	 * Create a new instance of Awk with the specified extension instances
148 	 * and settings.
149 	 *
150 	 * @param extensions extension instances implementing {@link JawkExtension}
151 	 * @param settings behavioral configuration for this engine
152 	 */
153 	public Awk(Collection<? extends JawkExtension> extensions, AwkSettings settings) {
154 		this(createExtensionSetup(extensions), settings);
155 	}
156 
157 	/**
158 	 * Create a new instance of Awk with the specified extension instances.
159 	 *
160 	 * @param extensions extension instances implementing {@link JawkExtension}
161 	 */
162 	@SafeVarargs
163 	public Awk(JawkExtension... extensions) {
164 		this(createExtensionSetup(Arrays.asList(extensions)));
165 	}
166 
167 	protected Awk(ExtensionSetup setup) {
168 		this(setup, new AwkSettings());
169 	}
170 
171 	protected Awk(ExtensionSetup setup, AwkSettings settings) {
172 		this.extensionFunctions = setup.functions;
173 		this.extensionInstances = setup.instances;
174 		this.settings = Objects.requireNonNull(settings, "settings");
175 	}
176 
177 	protected Map<String, ExtensionFunction> getExtensionFunctions() {
178 		return extensionFunctions;
179 	}
180 
181 	protected Map<String, JawkExtension> getExtensionInstances() {
182 		return extensionInstances;
183 	}
184 
185 	/**
186 	 * Returns the behavioral settings associated with this engine instance.
187 	 *
188 	 * @return the {@link AwkSettings} used by this instance, never {@code null}
189 	 */
190 	@SuppressFBWarnings("EI_EXPOSE_REP")
191 	public AwkSettings getSettings() {
192 		return settings;
193 	}
194 
195 	static Map<String, ExtensionFunction> createExtensionFunctionMap(Collection<? extends JawkExtension> extensions) {
196 		return createExtensionSetup(extensions).functions;
197 	}
198 
199 	static Map<String, JawkExtension> createExtensionInstanceMap(Collection<? extends JawkExtension> extensions) {
200 		return createExtensionSetup(extensions).instances;
201 	}
202 
203 	static Map<String, ExtensionFunction> createExtensionFunctionMap(JawkExtension... extensions) {
204 		if (extensions == null || extensions.length == 0) {
205 			return ExtensionSetup.EMPTY.functions;
206 		}
207 		return createExtensionFunctionMap(Arrays.asList(extensions));
208 	}
209 
210 	static Map<String, JawkExtension> createExtensionInstanceMap(JawkExtension... extensions) {
211 		if (extensions == null || extensions.length == 0) {
212 			return ExtensionSetup.EMPTY.instances;
213 		}
214 		return createExtensionInstanceMap(Arrays.asList(extensions));
215 	}
216 
217 	private static ExtensionSetup createExtensionSetup(Collection<? extends JawkExtension> extensions) {
218 		if (extensions == null || extensions.isEmpty()) {
219 			return ExtensionSetup.EMPTY;
220 		}
221 		Map<String, ExtensionFunction> keywordMap = new LinkedHashMap<String, ExtensionFunction>();
222 		Map<String, JawkExtension> instanceMap = new LinkedHashMap<String, JawkExtension>();
223 		for (JawkExtension extension : extensions) {
224 			if (extension == null) {
225 				throw new IllegalArgumentException("Extension instance must not be null");
226 			}
227 			String className = extension.getClass().getName();
228 			JawkExtension previousInstance = instanceMap.putIfAbsent(className, extension);
229 			if (previousInstance != null) {
230 				throw new IllegalArgumentException(
231 						"Extension class '" + className + "' was provided multiple times");
232 			}
233 			for (Map.Entry<String, ExtensionFunction> entry : extension.getExtensionFunctions().entrySet()) {
234 				String keyword = entry.getKey();
235 				ExtensionFunction previous = keywordMap.putIfAbsent(keyword, entry.getValue());
236 				if (previous != null) {
237 					throw new IllegalArgumentException(
238 							"Keyword '" + keyword + "' already provided by another extension");
239 				}
240 			}
241 		}
242 		return new ExtensionSetup(
243 				Collections.unmodifiableMap(keywordMap),
244 				Collections.unmodifiableMap(instanceMap));
245 	}
246 
247 	private static final class ExtensionSetup {
248 
249 		private static final ExtensionSetup EMPTY = new ExtensionSetup(
250 				Collections.<String, ExtensionFunction>emptyMap(),
251 				Collections.<String, JawkExtension>emptyMap());
252 
253 		private final Map<String, ExtensionFunction> functions;
254 		private final Map<String, JawkExtension> instances;
255 
256 		private ExtensionSetup(Map<String, ExtensionFunction> functionsParam,
257 				Map<String, JawkExtension> instancesParam) {
258 			this.functions = functionsParam;
259 			this.instances = instancesParam;
260 		}
261 	}
262 
263 	/**
264 	 * Returns the last parsed AST produced by the most recent program compilation.
265 	 *
266 	 * @return the last {@link AstNode}, or {@code null} if no compilation occurred
267 	 */
268 	@SuppressFBWarnings("EI_EXPOSE_REP")
269 	public AstNode getLastAst() {
270 		return lastAst;
271 	}
272 
273 	/**
274 	 * Final empty finalizer to mitigate finalizer attacks flagged by SpotBugs.
275 	 * This prevents subclasses from introducing a finalizer that could run on a
276 	 * partially constructed instance if a constructor throws.
277 	 */
278 	@SuppressWarnings("deprecation")
279 	@Override
280 	protected final void finalize() { /* no-op */ }
281 
282 	/**
283 	 * Compiles a full AWK program.
284 	 *
285 	 * @param script AWK program source
286 	 * @return compiled immutable program
287 	 * @throws IOException if compilation fails
288 	 */
289 	public AwkProgram compile(String script) throws IOException {
290 		return compile(script, false);
291 	}
292 
293 	/**
294 	 * Compiles a full AWK program.
295 	 *
296 	 * @param script AWK program source
297 	 * @return compiled immutable program
298 	 * @throws IOException if compilation fails
299 	 */
300 	public AwkProgram compile(Reader script) throws IOException {
301 		return compile(script, false);
302 	}
303 
304 	/**
305 	 * Creates a reusable runtime backed by one {@link AVM} instance.
306 	 *
307 	 * @return reusable AVM
308 	 */
309 	public AVM createAvm() {
310 		return createAvm(this.settings);
311 	}
312 
313 	/**
314 	 * Creates a reusable runtime backed by one {@link AVM} instance, optionally
315 	 * collecting runtime profiling statistics.
316 	 *
317 	 * @param profilingEnabled whether runtime profiling should be enabled
318 	 * @return reusable AVM
319 	 */
320 	public AVM createAvm(boolean profilingEnabled) {
321 		return createAvm(this.settings, profilingEnabled);
322 	}
323 
324 	/**
325 	 * Starts building a run request for a compiled AWK program.
326 	 * <p>
327 	 * Use the returned {@link AwkRunBuilder} to configure input, arguments,
328 	 * variables, and output, then call one of the terminal methods to execute.
329 	 * </p>
330 	 *
331 	 * <pre>{@code
332 	 * awk.script(program).input(stream).execute(mySink);
333 	 * String out = awk.script(program).input("hello").execute();
334 	 * }</pre>
335 	 *
336 	 * @param program compiled program to execute
337 	 * @return a builder for configuring and executing the run
338 	 */
339 	public AwkRunBuilder script(AwkProgram program) {
340 		return new AwkRunBuilder(Objects.requireNonNull(program, "program"));
341 	}
342 
343 	/**
344 	 * Starts building a run request from an AWK script string.
345 	 * <p>
346 	 * The script is compiled and executed when a terminal method is called.
347 	 * Additional scripts can be appended by calling {@link AwkRunBuilder#script(String)}
348 	 * on the returned builder.
349 	 * </p>
350 	 *
351 	 * <pre>{@code
352 	 * String result = awk.script("{ print toupper($0) }").input("hello").execute();
353 	 * }</pre>
354 	 *
355 	 * @param scriptText AWK program source
356 	 * @return a builder for configuring and executing the run
357 	 */
358 	public AwkRunBuilder script(String scriptText) {
359 		return new AwkRunBuilder().script(Objects.requireNonNull(scriptText, "script"));
360 	}
361 
362 	/**
363 	 * Evaluates a compiled expression using a fresh isolated runtime.
364 	 *
365 	 * @param expression compiled expression
366 	 * @return evaluated value
367 	 * @throws IOException if evaluation fails
368 	 */
369 	public Object eval(AwkExpression expression) throws IOException {
370 		AwkExpression compiledExpression = Objects.requireNonNull(expression, "expression");
371 		try (AVM activeEvalAvm = createAvm(settings)) {
372 			return activeEvalAvm.eval(compiledExpression, new SingleRecordInputSource(null));
373 		}
374 	}
375 
376 	/**
377 	 * Evaluates a compiled expression against one text record using a fresh
378 	 * isolated runtime.
379 	 *
380 	 * @param expression compiled expression
381 	 * @param input record exposed as {@code $0}
382 	 * @return evaluated value
383 	 * @throws IOException if evaluation fails
384 	 */
385 	public Object eval(AwkExpression expression, String input) throws IOException {
386 		AwkExpression compiledExpression = Objects.requireNonNull(expression, "expression");
387 		try (AVM activeEvalAvm = createAvm(settings)) {
388 			return activeEvalAvm.eval(compiledExpression, new SingleRecordInputSource(input));
389 		}
390 	}
391 
392 	/**
393 	 * Evaluates a compiled expression against one structured record source using a
394 	 * fresh isolated runtime.
395 	 *
396 	 * @param expression compiled expression
397 	 * @param source structured record source
398 	 * @return evaluated value
399 	 * @throws IOException if evaluation fails
400 	 */
401 	public Object eval(AwkExpression expression, InputSource source) throws IOException {
402 		AwkExpression compiledExpression = Objects.requireNonNull(expression, "expression");
403 		InputSource resolvedSource = Objects.requireNonNull(source, "source");
404 		try (AVM activeEvalAvm = createAvm(settings)) {
405 			return activeEvalAvm.eval(compiledExpression, resolvedSource);
406 		}
407 	}
408 
409 	/**
410 	 * Compiles the specified AWK script and returns an immutable AWK program.
411 	 *
412 	 * @param script AWK script to compile
413 	 * @param disableOptimizeParam {@code true} to skip tuple optimization
414 	 * @return compiled immutable program
415 	 * @throws IOException if an I/O error occurs during compilation
416 	 */
417 	AwkProgram compile(String script, boolean disableOptimizeParam) throws IOException {
418 		ScriptSource source = new ScriptSource(
419 				ScriptSource.DESCRIPTION_COMMAND_LINE_SCRIPT,
420 				new StringReader(script));
421 		return compile(Collections.singletonList(source), disableOptimizeParam);
422 	}
423 
424 	/**
425 	 * Compiles the specified AWK script and returns an immutable AWK program.
426 	 *
427 	 * @param script AWK script to compile (as a {@link Reader})
428 	 * @param disableOptimizeParam {@code true} to skip tuple optimization
429 	 * @return compiled immutable program
430 	 * @throws IOException if an I/O error occurs during compilation
431 	 */
432 	AwkProgram compile(Reader script, boolean disableOptimizeParam) throws IOException {
433 		ScriptSource source = new ScriptSource(
434 				ScriptSource.DESCRIPTION_COMMAND_LINE_SCRIPT,
435 				script);
436 		return compile(Collections.singletonList(source), disableOptimizeParam);
437 	}
438 
439 	/**
440 	 * Compiles a list of script sources into an immutable AWK program that can be
441 	 * executed by the {@link AVM} runtime.
442 	 *
443 	 * @param scripts script sources to compile
444 	 * @return compiled immutable program
445 	 * @throws IOException if an I/O error occurs while reading the
446 	 *         scripts
447 	 */
448 	public AwkProgram compile(List<ScriptSource> scripts)
449 			throws IOException {
450 		return compile(scripts, false);
451 	}
452 
453 	/**
454 	 * Compiles a list of script sources into an immutable AWK program that can be
455 	 * executed by the {@link AVM} runtime.
456 	 *
457 	 * @param scripts script sources to compile
458 	 * @param disableOptimizeParam {@code true} to skip tuple optimization
459 	 * @return compiled immutable program
460 	 * @throws IOException if an I/O error occurs while reading the
461 	 *         scripts
462 	 */
463 	public AwkProgram compile(List<ScriptSource> scripts, boolean disableOptimizeParam)
464 			throws IOException {
465 		return compileProgram(scripts, disableOptimizeParam, new AwkProgram());
466 	}
467 
468 	/**
469 	 * Compiles a full AWK program into the supplied tuple implementation.
470 	 *
471 	 * @param scripts script sources to compile
472 	 * @param disableOptimizeParam {@code true} to skip tuple optimization
473 	 * @param tuples destination tuple implementation
474 	 * @param <T> concrete tuple type to populate
475 	 * @return the populated compiled program
476 	 * @throws IOException if reading script sources fails
477 	 */
478 	protected final <T extends AwkProgram> T compileProgram(
479 			List<ScriptSource> scripts,
480 			boolean disableOptimizeParam,
481 			T tuples)
482 			throws IOException {
483 		lastAst = null;
484 		if (!scripts.isEmpty()) {
485 			// Parse all script sources into a single AST
486 			AwkParser parser = new AwkParser(this.extensionFunctions, settings.isAllowArraysOfArrays());
487 			AstNode ast = parser.parse(scripts);
488 			lastAst = ast;
489 			if (ast != null) {
490 				// Perform semantic checks twice to resolve forward references
491 				ast.semanticAnalysis();
492 				ast.semanticAnalysis();
493 				// Build tuples from the AST
494 				ast.populateTuples(tuples);
495 				// Assign addresses and prepare tuples for interpretation
496 				tuples.postProcess();
497 				if (!disableOptimizeParam) {
498 					tuples.optimize();
499 				}
500 				// Record global variable offset mappings for the interpreter
501 				parser.populateGlobalVariableNameToOffsetMappings(tuples);
502 			}
503 		}
504 		tuples.freezeMetadata();
505 
506 		return tuples;
507 	}
508 
509 	/**
510 	 * Compile an expression to evaluate (not a full script).
511 	 *
512 	 * @param expression AWK expression to compile
513 	 * @return compiled immutable expression
514 	 * @throws IOException if anything goes wrong with the compilation
515 	 */
516 	public AwkExpression compileExpression(String expression) throws IOException {
517 		return compileExpression(expression, false);
518 	}
519 
520 	/**
521 	 * Compile an expression to evaluate (not a full script).
522 	 *
523 	 * @param expression AWK expression to compile
524 	 * @param disableOptimizeParam {@code true} to skip tuple optimization
525 	 * @return compiled immutable expression
526 	 * @throws IOException if anything goes wrong with the compilation
527 	 */
528 	public AwkExpression compileExpression(String expression, boolean disableOptimizeParam) throws IOException {
529 		return compileExpression(expression, disableOptimizeParam, new AwkExpression());
530 	}
531 
532 	/**
533 	 * Compiles an AWK expression into the supplied tuple implementation.
534 	 *
535 	 * @param expression expression source to compile
536 	 * @param disableOptimizeParam {@code true} to skip tuple optimization
537 	 * @param tuples destination tuple implementation
538 	 * @param <T> concrete tuple type to populate
539 	 * @return the populated compiled expression
540 	 * @throws IOException if reading the expression fails
541 	 */
542 	protected final <T extends AwkExpression> T compileExpression(
543 			String expression,
544 			boolean disableOptimizeParam,
545 			T tuples)
546 			throws IOException {
547 		// Create a ScriptSource
548 		ScriptSource expressionSource = new ScriptSource(
549 				ScriptSource.DESCRIPTION_COMMAND_LINE_SCRIPT,
550 				new StringReader(expression));
551 
552 		// Parse the expression
553 		AwkParser parser = new AwkParser(this.extensionFunctions, settings.isAllowArraysOfArrays());
554 		AstNode ast = parser.parseExpression(expressionSource);
555 
556 		// Attempt to traverse the syntax tree and build
557 		// the intermediate code
558 		if (ast != null) {
559 			// 1st pass to tie actual parameters to back-referenced formal parameters
560 			ast.semanticAnalysis();
561 			// 2nd pass to tie actual parameters to forward-referenced formal parameters
562 			ast.semanticAnalysis();
563 			// build tuples
564 			ast.populateTuples(tuples);
565 			// Calls touch(...) per Tuple so that addresses can be normalized/assigned/allocated
566 			tuples.postProcess();
567 			if (!disableOptimizeParam) {
568 				tuples.optimize();
569 			}
570 			// record global_var -> offset mapping into the tuples
571 			// so that the interpreter can assign variables
572 			parser.populateGlobalVariableNameToOffsetMappings(tuples);
573 		}
574 		tuples.freezeMetadata();
575 
576 		return tuples;
577 	}
578 
579 	/**
580 	 * Evaluates the specified AWK expression (not a full script, just an expression)
581 	 * and returns the value of this expression.
582 	 *
583 	 * @param expression Expression to evaluate (e.g. <code>2+3</code>)
584 	 * @return the value of the specified expression
585 	 * @throws IOException if anything goes wrong with the evaluation
586 	 */
587 	public Object eval(String expression) throws IOException {
588 		return eval(compileExpression(expression));
589 	}
590 
591 	/**
592 	 * Evaluates the specified AWK expression (not a full script, just an expression)
593 	 * and returns the value of this expression.
594 	 *
595 	 * @param expression Expression to evaluate (e.g. <code>2+3</code> or <code>$2 "-" $3</code>
596 	 * @param input Optional text input (that will be available as $0, and tokenized as $1, $2, etc.)
597 	 * @return the value of the specified expression
598 	 * @throws IOException if anything goes wrong with the evaluation
599 	 */
600 	public Object eval(String expression, String input) throws IOException {
601 		return eval(compileExpression(expression), input);
602 	}
603 
604 	/**
605 	 * Evaluates the specified AWK expression using a structured {@link InputSource}
606 	 * to populate {@code $0}, {@code $1}, etc.
607 	 *
608 	 * @param expression Expression to evaluate (e.g. {@code $2 "-" $3})
609 	 * @param source structured input source providing the current record
610 	 * @return the value of the specified expression
611 	 * @throws IOException if anything goes wrong with the evaluation
612 	 */
613 	public Object eval(String expression, InputSource source) throws IOException {
614 		return eval(compileExpression(expression), source);
615 	}
616 
617 	/**
618 	 * Prepares one text record for repeated expression evaluation and returns the
619 	 * mutable {@link AVM} that will execute those expressions.
620 	 * <p>
621 	 * The returned {@link AVM} is created using the current runtime
622 	 * configuration of this {@link Awk} instance and binds the provided record
623 	 * once. Later calls to
624 	 * {@link AVM#eval(AwkExpression)} reuse the same AVM state without resetting it
625 	 * between expressions, so mutations intentionally leak across evaluations.
626 	 * This is the high-level convenience wrapper around direct
627 	 * {@link AVM#prepareForEval(String)} and {@link AVM#eval(AwkExpression)} usage.
628 	 * </p>
629 	 *
630 	 * @param input non-null text record to expose as {@code $0}
631 	 *        Call {@link AVM#close()} when you are done with the returned interpreter.
632 	 * @return prepared AVM ready for repeated {@link AVM#eval(AwkExpression)} calls
633 	 * @throws IOException if binding the record fails
634 	 */
635 	public AVM prepareEval(String input) throws IOException {
636 		String resolvedInput = Objects.requireNonNull(input, "input");
637 		AVM evalAvm = createAvm(settings);
638 		try {
639 			evalAvm.prepareForEval(resolvedInput);
640 			return evalAvm;
641 		} catch (IOException | RuntimeException e) {
642 			try {
643 				evalAvm.close();
644 			} catch (IOException closeException) {
645 				e.addSuppressed(closeException);
646 			}
647 			throw e;
648 		}
649 	}
650 
651 	/**
652 	 * Prepares the first available record from a structured {@link InputSource}
653 	 * for repeated expression evaluation and returns the mutable {@link AVM}
654 	 * that will execute those expressions.
655 	 * <p>
656 	 * The returned AVM remains attached to the provided source, so later
657 	 * {@code getline} operations and repeated {@link AVM#prepareForEval(InputSource)}
658 	 * calls continue from that source's current position. Later
659 	 * {@link AVM#eval(AwkExpression)} calls reuse the same AVM state without
660 	 * resetting it between expressions, so mutations intentionally leak across
661 	 * evaluations. Close the returned AVM when you are done with it to release
662 	 * any bound input or runtime I/O resources.
663 	 * </p>
664 	 *
665 	 * @param source structured source providing the record to bind
666 	 * @return prepared AVM ready for repeated {@link AVM#eval(AwkExpression)} calls
667 	 * @throws IOException if reading the record fails or the source is exhausted
668 	 */
669 	public AVM prepareEval(InputSource source) throws IOException {
670 		InputSource resolvedSource = Objects.requireNonNull(source, "source");
671 		AVM evalAvm = createAvm(settings);
672 		try {
673 			if (!evalAvm.prepareForEval(resolvedSource)) {
674 				throw new IOException("No record available from source.");
675 			}
676 			return evalAvm;
677 		} catch (IOException | RuntimeException e) {
678 			try {
679 				evalAvm.close();
680 			} catch (IOException closeException) {
681 				e.addSuppressed(closeException);
682 			}
683 			throw e;
684 		}
685 	}
686 
687 	/**
688 	 * Creates an {@link AVM} using the provided runtime settings.
689 	 *
690 	 * @param settingsParam runtime settings to apply
691 	 * @return reusable AVM
692 	 */
693 	protected AVM createAvm(AwkSettings settingsParam) {
694 		return createAvm(settingsParam, false);
695 	}
696 
697 	/**
698 	 * Creates an {@link AVM} using the provided runtime settings and profiling
699 	 * mode.
700 	 *
701 	 * @param settingsParam runtime settings to apply
702 	 * @param profilingEnabled whether runtime profiling should be enabled
703 	 * @return reusable AVM
704 	 */
705 	protected AVM createAvm(AwkSettings settingsParam, boolean profilingEnabled) {
706 		return new AVM(settingsParam, this.extensionInstances, profilingEnabled);
707 	}
708 
709 	/**
710 	 * Converts a text input into an {@link InputStream} using UTF-8 encoding.
711 	 */
712 	private static InputStream toInputStream(String input) {
713 		if (input == null) {
714 			return new ByteArrayInputStream(new byte[0]);
715 		}
716 		return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
717 	}
718 
719 	/**
720 	 * Fluent builder for configuring and executing an AWK script or program.
721 	 * <p>
722 	 * Obtain an instance through {@link Awk#script(String)} or
723 	 * {@link Awk#script(AwkProgram)}, configure input, arguments, and
724 	 * variables, then call one of the terminal methods to execute.
725 	 * </p>
726 	 *
727 	 * <pre>{@code
728 	 * // Execute and capture printed output as a String
729 	 * String result = awk.script("{ print toupper($0) }").input("hello").execute();
730 	 *
731 	 * // Execute to a specific stream
732 	 * awk.script(program).input(stream).execute(outputStream);
733 	 *
734 	 * // Execute with a custom sink
735 	 * awk.script("{ print $1 }").input(source).execute(mySink);
736 	 *
737 	 * // Execute to an appendable
738 	 * awk.script("{ print $1 }").input(source).execute(appendable);
739 	 * }</pre>
740 	 */
741 	public final class AwkRunBuilder {
742 
743 		private AwkProgram compiledProgram;
744 		private List<String> scripts;
745 		private InputStream inputStream;
746 		private InputSource inputSource;
747 		private List<String> arguments;
748 		private Map<String, Object> variableOverrides;
749 		private PrintStream errorStream;
750 
751 		AwkRunBuilder() {}
752 
753 		AwkRunBuilder(AwkProgram program) {
754 			this.compiledProgram = program;
755 		}
756 
757 		/**
758 		 * Appends an additional AWK script to compile and execute.
759 		 * Multiple scripts are concatenated, like multiple {@code -f} options
760 		 * in the CLI.
761 		 *
762 		 * @param scriptText AWK program source
763 		 * @return this builder
764 		 * @throws IllegalStateException if a precompiled program was already set
765 		 */
766 		public AwkRunBuilder script(String scriptText) {
767 			if (compiledProgram != null) {
768 				throw new IllegalStateException("Cannot add scripts when a precompiled program is set");
769 			}
770 			if (scripts == null) {
771 				scripts = new ArrayList<String>();
772 			}
773 			scripts.add(Objects.requireNonNull(scriptText, "script"));
774 			return this;
775 		}
776 
777 		/**
778 		 * Sets the text input to process.
779 		 *
780 		 * @param input text input (encoded as UTF-8 internally)
781 		 * @return this builder
782 		 */
783 		public AwkRunBuilder input(String input) {
784 			this.inputStream = toInputStream(input);
785 			return this;
786 		}
787 
788 		/**
789 		 * Sets the byte-stream input to process.
790 		 *
791 		 * @param input byte stream, or {@code null} for no input
792 		 * @return this builder
793 		 */
794 		public AwkRunBuilder input(InputStream input) {
795 			this.inputStream = input;
796 			return this;
797 		}
798 
799 		/**
800 		 * Sets a structured {@link InputSource} to process.
801 		 *
802 		 * @param source structured record source
803 		 * @return this builder
804 		 */
805 		public AwkRunBuilder input(InputSource source) {
806 			this.inputSource = source;
807 			return this;
808 		}
809 
810 		/**
811 		 * Sets runtime arguments visible through {@code ARGC}/{@code ARGV}.
812 		 *
813 		 * @param args runtime arguments
814 		 * @return this builder
815 		 */
816 		@SuppressFBWarnings("EI_EXPOSE_REP2")
817 		public AwkRunBuilder arguments(List<String> args) {
818 			this.arguments = args;
819 			return this;
820 		}
821 
822 		/**
823 		 * Sets runtime arguments visible through {@code ARGC}/{@code ARGV}.
824 		 *
825 		 * @param args runtime arguments
826 		 * @return this builder
827 		 */
828 		public AwkRunBuilder arguments(String... args) {
829 			this.arguments = Arrays.asList(args);
830 			return this;
831 		}
832 
833 		/**
834 		 * Adds a single runtime argument visible through {@code ARGC}/{@code ARGV}.
835 		 *
836 		 * @param arg runtime argument
837 		 * @return this builder
838 		 */
839 		public AwkRunBuilder argument(String arg) {
840 			if (this.arguments == null) {
841 				this.arguments = new ArrayList<String>();
842 			}
843 			this.arguments.add(Objects.requireNonNull(arg, "arg"));
844 			return this;
845 		}
846 
847 		/**
848 		 * Sets the stream used for the stderr output of spawned processes
849 		 * (e.g.&nbsp;{@code system("...")}).
850 		 * <p>
851 		 * When not set, process stderr is merged into the main output sink.
852 		 * The CLI sets this explicitly to {@code System.err} so that command
853 		 * errors appear on the console rather than being mixed with normal output.
854 		 *
855 		 * @param stream stream to receive process stderr
856 		 * @return this builder
857 		 */
858 		public AwkRunBuilder errorStream(PrintStream stream) {
859 			this.errorStream = Objects.requireNonNull(stream, "errorStream");
860 			return this;
861 		}
862 
863 		/**
864 		 * Sets per-call variable overrides applied on top of the settings-level
865 		 * variables.
866 		 *
867 		 * @param overrides variable assignments (may be {@code null})
868 		 * @return this builder
869 		 */
870 		@SuppressFBWarnings("EI_EXPOSE_REP2")
871 		public AwkRunBuilder variables(Map<String, Object> overrides) {
872 			this.variableOverrides = overrides;
873 			return this;
874 		}
875 
876 		/**
877 		 * Sets a single per-call variable override.
878 		 *
879 		 * @param name variable name
880 		 * @param value variable value
881 		 * @return this builder
882 		 */
883 		public AwkRunBuilder variable(String name, Object value) {
884 			if (this.variableOverrides == null) {
885 				this.variableOverrides = new LinkedHashMap<String, Object>();
886 			}
887 			this.variableOverrides
888 					.put(
889 							Objects.requireNonNull(name, "name"),
890 							value);
891 			return this;
892 		}
893 
894 		/**
895 		 * Executes the script and returns the printed output as a {@link String}.
896 		 *
897 		 * @return printed output
898 		 * @throws IOException if compilation or execution fails
899 		 * @throws ExitException if the script terminates with a non-zero exit code
900 		 */
901 		public String execute() throws IOException, ExitException {
902 			StringBuilder output = new StringBuilder();
903 			doExecute(new AppendableAwkSink(output, settings.getLocale()));
904 			return output.toString();
905 		}
906 
907 		/**
908 		 * Executes the script, sending output to the specified {@link AwkSink}.
909 		 *
910 		 * @param sink output sink
911 		 * @throws IOException if compilation or execution fails
912 		 * @throws ExitException if the script terminates with a non-zero exit code
913 		 */
914 		public void execute(AwkSink sink) throws IOException, ExitException {
915 			doExecute(Objects.requireNonNull(sink, "sink"));
916 		}
917 
918 		/**
919 		 * Executes the script, sending output to the specified {@link PrintStream}.
920 		 *
921 		 * @param out print stream (e.g. {@code System.out})
922 		 * @throws IOException if compilation or execution fails
923 		 * @throws ExitException if the script terminates with a non-zero exit code
924 		 */
925 		public void execute(PrintStream out) throws IOException, ExitException {
926 			Objects.requireNonNull(out, "out");
927 			doExecute(new OutputStreamAwkSink(out, settings.getLocale()));
928 		}
929 
930 		/**
931 		 * Executes the script, sending output to the specified {@link OutputStream}.
932 		 *
933 		 * @param out output stream
934 		 * @throws IOException if compilation or execution fails
935 		 * @throws ExitException if the script terminates with a non-zero exit code
936 		 */
937 		public void execute(OutputStream out) throws IOException, ExitException {
938 			doExecute(new OutputStreamAwkSink(toPrintStream(out), settings.getLocale()));
939 		}
940 
941 		/**
942 		 * Executes the script, sending output to the specified {@link Appendable}
943 		 * (such as {@link StringBuilder} or {@link java.io.StringWriter}).
944 		 *
945 		 * @param appendable output destination
946 		 * @throws IOException if compilation or execution fails
947 		 * @throws ExitException if the script terminates with a non-zero exit code
948 		 */
949 		public void execute(Appendable appendable) throws IOException, ExitException {
950 			doExecute(
951 					new AppendableAwkSink(
952 							Objects.requireNonNull(appendable, "appendable"),
953 							settings.getLocale()));
954 		}
955 
956 		private void doExecute(AwkSink sink) throws IOException, ExitException {
957 			AwkProgram program = resolveProgram();
958 			List<String> resolvedArguments = arguments == null ? Collections.<String>emptyList() : arguments;
959 			try (AVM avm = createAvm(settings)) {
960 				avm.setAwkSink(sink);
961 				avm.setErrorStream(errorStream != null ? errorStream : sink.getPrintStream());
962 				try {
963 					InputSource resolvedSource;
964 					if (inputSource != null) {
965 						resolvedSource = inputSource;
966 					} else {
967 						InputStream in = inputStream != null ? inputStream : new ByteArrayInputStream(new byte[0]);
968 						resolvedSource = new StreamInputSource(in, avm, avm.getJrt());
969 					}
970 					avm.execute(program, resolvedSource, resolvedArguments, variableOverrides);
971 				} catch (ExitException e) {
972 					if (e.getCode() != 0) {
973 						throw e;
974 					}
975 				} finally {
976 					sink.flush();
977 				}
978 			}
979 		}
980 
981 		private AwkProgram resolveProgram() throws IOException {
982 			if (compiledProgram != null) {
983 				return compiledProgram;
984 			}
985 			if (scripts == null || scripts.isEmpty()) {
986 				throw new IllegalStateException("No script or program specified");
987 			}
988 			if (scripts.size() == 1) {
989 				return compile(scripts.get(0));
990 			}
991 			List<ScriptSource> sources = new ArrayList<ScriptSource>(scripts.size());
992 			for (int i = 0; i < scripts.size(); i++) {
993 				sources
994 						.add(
995 								new ScriptSource(
996 										ScriptSource.DESCRIPTION_COMMAND_LINE_SCRIPT,
997 										new StringReader(scripts.get(i))));
998 			}
999 			return compile(sources);
1000 		}
1001 	}
1002 
1003 	private static PrintStream toPrintStream(OutputStream out) {
1004 		Objects.requireNonNull(out, "outputStream");
1005 		if (out instanceof PrintStream) {
1006 			return (PrintStream) out;
1007 		}
1008 		try {
1009 			return new PrintStream(out, false, "UTF-8");
1010 		} catch (java.io.UnsupportedEncodingException e) {
1011 			throw new IllegalStateException(e);
1012 		}
1013 	}
1014 
1015 	/**
1016 	 * Lists metadata for the {@link JawkExtension} implementations discovered on
1017 	 * the class path.
1018 	 *
1019 	 * @return list of discovered extension descriptors
1020 	 */
1021 	public static Map<String, JawkExtension> listAvailableExtensions() {
1022 		return ExtensionRegistry.listExtensions();
1023 	}
1024 
1025 	private static final class SingleRecordInputSource implements InputSource {
1026 
1027 		private final String record;
1028 
1029 		private boolean consumed;
1030 
1031 		private SingleRecordInputSource(String record) {
1032 			this.record = record;
1033 		}
1034 
1035 		@Override
1036 		public boolean nextRecord() {
1037 			if (consumed || record == null) {
1038 				return false;
1039 			}
1040 			consumed = true;
1041 			return true;
1042 		}
1043 
1044 		@Override
1045 		public String getRecordText() {
1046 			return consumed ? record : null;
1047 		}
1048 
1049 		@Override
1050 		public List<String> getFields() {
1051 			return null;
1052 		}
1053 
1054 		@Override
1055 		public boolean isFromFilenameList() {
1056 			return false;
1057 		}
1058 	}
1059 
1060 }