View Javadoc
1   package io.jawk.jrt;
2   
3   /*-
4    * ╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲
5    * Jawk
6    * ჻჻჻჻჻჻
7    * Copyright (C) 2006 - 2026 MetricsHub
8    * ჻჻჻჻჻჻
9    * This program is free software: you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation, either version 3 of the
12   * License, or (at your option) any later version.
13   *
14   * This program is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU General Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU General Lesser Public
20   * License along with this program.  If not, see
21   * <http://www.gnu.org/licenses/lgpl-3.0.html>.
22   * ╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱
23   */
24  
25  import java.io.Closeable;
26  import java.io.FileInputStream;
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.io.InputStreamReader;
30  import java.nio.charset.StandardCharsets;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Objects;
34  
35  /**
36   * An {@link InputSource} that reads records from an {@link InputStream},
37   * traversing the {@code ARGV} array to open filenames and apply
38   * {@code name=value} variable assignments exactly like the classic AWK
39   * command-line flow.
40   * <p>
41   * When no filename arguments are present in {@code ARGV}, records are read
42   * from the supplied default {@link InputStream} (usually {@code System.in}).
43   * This class is the default {@link InputSource} used internally by the
44   * runtime when no custom source has been configured via
45   * {@code AwkSettings#setInputSource(...)}.
46   * </p>
47   * <p>
48   * API note: this type is public to allow runtime wiring between packages, but
49   * it is considered an internal implementation detail. Embedding applications
50   * should implement {@link InputSource} directly rather than depend on this
51   * class, whose behavior may change in future releases.
52   * </p>
53   *
54   * @see InputSource
55   */
56  public class StreamInputSource implements InputSource, Closeable {
57  
58  	private final InputStream defaultInput;
59  	private final VariableManager vm;
60  	private final JRT jrt;
61  
62  	// ARGV traversal state
63  	private Map<Object, Object> arglistMap;
64  	private int arglistIdx;
65  	private int arglistMaxKey;
66  	private boolean hasFilenames;
67  
68  	// Current reader and record
69  	private PartitioningReader partitioningReader;
70  	private boolean currentFromFilenameList;
71  	private String currentRecord;
72  	private boolean currentReaderExhausted;
73  
74  	/**
75  	 * Creates a stream-backed input source.
76  	 *
77  	 * @param defaultInput the fallback input stream used when {@code ARGV}
78  	 *        contains no filename arguments (typically {@code System.in})
79  	 * @param vm the variable manager providing access to {@code ARGV} and
80  	 *        {@code ARGC}
81  	 * @param jrt the JRT instance used for string conversion and special
82  	 *        variable updates
83  	 */
84  	public StreamInputSource(InputStream defaultInput, VariableManager vm, JRT jrt) {
85  		this.defaultInput = Objects.requireNonNull(defaultInput, "defaultInput");
86  		this.vm = Objects.requireNonNull(vm, "vm");
87  		this.jrt = Objects.requireNonNull(jrt, "jrt");
88  	}
89  
90  	/** {@inheritDoc} */
91  	@Override
92  	public boolean nextRecord() throws IOException {
93  		initializeArgList();
94  
95  		while (true) {
96  			if (partitioningReader == null || currentReaderExhausted) {
97  				if (!prepareNextReader()) {
98  					return false;
99  				}
100 				currentReaderExhausted = false;
101 			}
102 
103 			String nextRecord = partitioningReader.readRecord();
104 			if (nextRecord != null) {
105 				currentRecord = nextRecord;
106 				currentFromFilenameList = partitioningReader.fromFilenameList();
107 				return true;
108 			}
109 			if (!partitioningReader.fromFilenameList()) {
110 				return false;
111 			}
112 			currentReaderExhausted = true;
113 		}
114 	}
115 
116 	/** {@inheritDoc} */
117 	@Override
118 	public String getRecordText() {
119 		return currentRecord;
120 	}
121 
122 	/**
123 	 * Always returns {@code null} so that the runtime splits {@code $0} using
124 	 * the current field separator (FS).
125 	 *
126 	 * @return {@code null}
127 	 */
128 	@Override
129 	public List<String> getFields() {
130 		return null;
131 	}
132 
133 	/** {@inheritDoc} */
134 	@Override
135 	public boolean isFromFilenameList() {
136 		return currentFromFilenameList;
137 	}
138 
139 	/**
140 	 * Propagates a record-separator change to the active
141 	 * {@link PartitioningReader}.
142 	 *
143 	 * @param rs the new record separator value
144 	 */
145 	public void setRecordSeparator(String rs) {
146 		if (partitioningReader != null) {
147 			partitioningReader.setRecordSeparator(rs);
148 		}
149 	}
150 
151 	/**
152 	 * Returns the underlying {@link PartitioningReader} currently in use, or
153 	 * {@code null} if no reader has been opened yet.
154 	 *
155 	 * @return the active reader, or {@code null}
156 	 */
157 	PartitioningReader getPartitioningReader() {
158 		return partitioningReader;
159 	}
160 
161 	// ------------------------------------------------------------------
162 	// ARGV traversal logic (moved from JRT)
163 	// ------------------------------------------------------------------
164 
165 	/**
166 	 * Initialize internal state for traversing {@code ARGV}.
167 	 */
168 	private void initializeArgList() {
169 		if (arglistMap != null) {
170 			return;
171 		}
172 		arglistMap = toArgvMap(vm.getARGV());
173 		arglistMaxKey = computeMaxArgvKey();
174 		arglistIdx = 1;
175 		hasFilenames = detectFilenames();
176 	}
177 
178 	private Map<Object, Object> toArgvMap(Object argv) {
179 		if (!(argv instanceof Map)) {
180 			throw new IllegalArgumentException("ARGV must be a Map.");
181 		}
182 		@SuppressWarnings("unchecked")
183 		Map<Object, Object> argvMap = (Map<Object, Object>) argv;
184 		return argvMap;
185 	}
186 
187 	/**
188 	 * Compute the highest numeric key present in the current {@code arglistMap}.
189 	 *
190 	 * @return the maximum integer key, or {@code 0} when the array is empty
191 	 */
192 	private int computeMaxArgvKey() {
193 		int max = 0;
194 		for (Object key : arglistMap.keySet()) {
195 			int idx = (int) JRT.toLong(key);
196 			if (idx > max) {
197 				max = idx;
198 			}
199 		}
200 		return max;
201 	}
202 
203 	/**
204 	 * Determine whether {@code ARGV} contains any filename entries (arguments
205 	 * without an equals sign).
206 	 *
207 	 * @return {@code true} if at least one filename was found
208 	 */
209 	private boolean detectFilenames() {
210 		int traversalArgCount = getTraversalArgCount();
211 		boolean found = false;
212 		for (int i = 1; i < traversalArgCount && !found; i++) {
213 			Object argValue = getArgvValue(i);
214 			if (argValue == MISSING_ARGV_VALUE) {
215 				continue;
216 			}
217 			String arg = jrt.toAwkString(argValue);
218 			if (arg.isEmpty() || arg.indexOf('=') > 0) {
219 				continue;
220 			}
221 			found = true;
222 		}
223 		return found;
224 	}
225 
226 	/**
227 	 * Retrieve the number of command-line arguments supplied to the script.
228 	 *
229 	 * @return {@code ARGC} converted to an {@code int}
230 	 */
231 	private int getArgCount() {
232 		long raw = JRT.toLong(vm.getARGC());
233 		if (raw <= 0) {
234 			return 0;
235 		}
236 		if (raw > Integer.MAX_VALUE) {
237 			return Integer.MAX_VALUE;
238 		}
239 		return (int) raw;
240 	}
241 
242 	/**
243 	 * Return the effective upper bound for ARGV traversal, capped by the
244 	 * highest known ARGV key so that absurdly large ARGC values do not
245 	 * cause unbounded iteration over missing entries.
246 	 *
247 	 * @return the capped traversal count
248 	 */
249 	private int getTraversalArgCount() {
250 		int argCount = getArgCount();
251 		if (argCount <= 0) {
252 			return 0;
253 		}
254 		return Math.min(argCount, arglistMaxKey + 1);
255 	}
256 
257 	/**
258 	 * Obtain the next valid argument from {@code ARGV}, skipping
259 	 * uninitialized or empty entries.
260 	 *
261 	 * @return the next argument as an AWK string, or {@code null} if none
262 	 *         remain
263 	 */
264 	private String nextArgument() {
265 		int traversalArgCount = getTraversalArgCount();
266 		while (arglistIdx < traversalArgCount) {
267 			int idx = arglistIdx++;
268 			Object argValue = getArgvValue(idx);
269 			if (argValue == MISSING_ARGV_VALUE) {
270 				continue;
271 			}
272 			String arg = jrt.toAwkString(argValue);
273 			if (!arg.isEmpty()) {
274 				return arg;
275 			}
276 		}
277 		return null;
278 	}
279 
280 	private static final Object MISSING_ARGV_VALUE = new Object();
281 
282 	private Object getArgvValue(int index) {
283 		Long longIndex = Long.valueOf(index);
284 		if (arglistMap instanceof AssocArray) {
285 			return JRT.containsAwkKey(arglistMap, longIndex) ? JRT.getAwkValue(arglistMap, longIndex) : MISSING_ARGV_VALUE;
286 		}
287 		if (arglistMap.containsKey(longIndex)) {
288 			return arglistMap.get(longIndex);
289 		}
290 		Integer intIndex = Integer.valueOf(index);
291 		if (arglistMap.containsKey(intIndex)) {
292 			return arglistMap.get(intIndex);
293 		}
294 		for (Map.Entry<Object, Object> entry : arglistMap.entrySet()) {
295 			Object key = entry.getKey();
296 			if (!(key instanceof Number)) {
297 				continue;
298 			}
299 			double numericKey = ((Number) key).doubleValue();
300 			if (JRT.isActuallyLong(numericKey) && ((long) Math.rint(numericKey)) == index) {
301 				return entry.getValue();
302 			}
303 		}
304 		return MISSING_ARGV_VALUE;
305 	}
306 
307 	/**
308 	 * Prepare the {@link PartitioningReader} for the next input source. This
309 	 * may be a filename, a variable assignment, or standard input if no
310 	 * filenames remain.
311 	 *
312 	 * @return {@code true} if a reader was prepared, {@code false} if no more
313 	 *         input is available
314 	 * @throws IOException if an I/O error occurs while opening a file
315 	 */
316 	private boolean prepareNextReader() throws IOException {
317 		boolean ready = false;
318 		arglistMaxKey = computeMaxArgvKey();
319 		hasFilenames = detectFilenames();
320 		while (!ready) {
321 			String arg = nextArgument();
322 			if (arg == null) {
323 				// ARGC/ARGV may have changed while evaluating assignments.
324 				hasFilenames = detectFilenames();
325 				if (partitioningReader == null && !hasFilenames) {
326 					partitioningReader = new PartitioningReader(
327 							new InputStreamReader(defaultInput, StandardCharsets.UTF_8),
328 							jrt.getRSString());
329 					jrt.setFILENAMEViaJrt("");
330 					return true;
331 				}
332 				closeCurrentReaderIfFileStream();
333 				return false;
334 			}
335 			if (arg.indexOf('=') > 0) {
336 				setFilelistVariable(arg);
337 				// Recompute bounds so ARGC changes are reflected immediately.
338 				arglistMaxKey = computeMaxArgvKey();
339 				hasFilenames = detectFilenames();
340 				if (partitioningReader == null && !hasFilenames) {
341 					partitioningReader = new PartitioningReader(
342 							new InputStreamReader(defaultInput, StandardCharsets.UTF_8),
343 							jrt.getRSString());
344 					jrt.setFILENAMEViaJrt("");
345 					return true;
346 				}
347 				if (partitioningReader != null) {
348 					jrt.setNR(jrt.getNR() + 1);
349 				}
350 			} else {
351 				closeCurrentReaderIfFileStream();
352 				partitioningReader = new PartitioningReader(
353 						new InputStreamReader(new FileInputStream(arg), StandardCharsets.UTF_8),
354 						jrt.getRSString(),
355 						true);
356 				jrt.setFILENAMEViaJrt(arg);
357 				jrt.setFNR(0L);
358 				ready = true;
359 			}
360 		}
361 		return true;
362 	}
363 
364 	/**
365 	 * Closes the current {@link PartitioningReader} if it wraps a file stream
366 	 * (not {@code defaultInput}). This prevents file-descriptor leaks when
367 	 * traversing multiple ARGV files.
368 	 */
369 	private void closeCurrentReaderIfFileStream() {
370 		if (partitioningReader != null && partitioningReader.fromFilenameList()) {
371 			try {
372 				partitioningReader.close();
373 			} catch (IOException ignored) {
374 				// Best-effort close; the file is no longer needed.
375 			}
376 		}
377 	}
378 
379 	/**
380 	 * Releases any open file-backed reader held by this source.
381 	 * <p>
382 	 * This method is idempotent and safe to call multiple times. It does
383 	 * <em>not</em> close the default input stream ({@code System.in}).
384 	 * </p>
385 	 *
386 	 * @throws IOException never thrown; signature required by {@link Closeable}
387 	 */
388 	@Override
389 	public void close() throws IOException {
390 		closeCurrentReaderIfFileStream();
391 	}
392 
393 	/**
394 	 * Parse a {@code name=value} argument from the command line and assign it
395 	 * to the corresponding AWK variable.
396 	 *
397 	 * @param nameValue argument in the form {@code name=value}
398 	 */
399 	private void setFilelistVariable(String nameValue) {
400 		int eqIdx = nameValue.indexOf('=');
401 		if (eqIdx == 0) {
402 			throw new IllegalArgumentException(
403 					"Must have a non-blank variable name in a name=value variable assignment argument.");
404 		}
405 		String name = nameValue.substring(0, eqIdx);
406 		String value = nameValue.substring(eqIdx + 1);
407 		Object obj;
408 		try {
409 			obj = Integer.parseInt(value);
410 		} catch (NumberFormatException nfe) {
411 			try {
412 				obj = Double.parseDouble(value);
413 			} catch (NumberFormatException nfe2) {
414 				obj = value;
415 			}
416 		}
417 		vm.assignVariable(name, obj);
418 	}
419 }