Skip to content

Commit 961802c

Browse files
committed
Use line-by-line reader for lock file detection
Per review feedback, avoid loading the entire file into memory when checking for the @explicit marker. The isLockFile method now takes a Path and uses a BufferedReader to read only the first 20 lines. Signed-off-by: Claude <noreply@anthropic.com>
1 parent c2829fc commit 961802c

File tree

2 files changed

+57
-48
lines changed

2 files changed

+57
-48
lines changed

modules/nextflow/src/main/groovy/nextflow/conda/CondaCache.groovy

Lines changed: 38 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -168,33 +168,33 @@ class CondaCache {
168168
}
169169

170170
/**
171-
* Check if the given content represents a conda lock file.
171+
* Check if the given file is a conda lock file.
172172
* A conda lock file contains "@EXPLICIT" marker in the first 20 lines.
173+
* This method reads only the first 20 lines to avoid loading the entire file into memory.
173174
*
174-
* @param content The file content to check
175+
* @param path The file path to check
175176
* @return true if this is a conda lock file, false otherwise
176177
*/
177178
@PackageScope
178-
boolean isLockFile(String content) {
179-
if( !content )
179+
boolean isLockFile(Path path) {
180+
if( path == null || !Files.exists(path) )
181+
return false
182+
try {
183+
path.withReader { reader ->
184+
String line
185+
int count = 0
186+
while( count < 20 && (line = reader.readLine()) != null ) {
187+
if( line.trim() == '@EXPLICIT' )
188+
return true
189+
count++
190+
}
191+
return false
192+
}
193+
}
194+
catch( Exception e ) {
195+
log.debug "Error checking if file is lock file: $path", e
180196
return false
181-
final lines = content.readLines()
182-
final limit = Math.min(20, lines.size())
183-
for( int i = 0; i < limit; i++ ) {
184-
if( lines[i].trim() == '@EXPLICIT' )
185-
return true
186197
}
187-
return false
188-
}
189-
190-
/**
191-
* Check if the given path is a remote HTTP/HTTPS URL
192-
* @param str The path string to check
193-
* @return true if it's an HTTP/HTTPS URL
194-
*/
195-
@PackageScope
196-
boolean isYamlUriPath(String str) {
197-
str?.startsWith('http://') || str?.startsWith('https://')
198198
}
199199

200200
/**
@@ -248,30 +248,24 @@ class CondaCache {
248248
// it's interpreted as user provided prefix directory
249249
else if( condaEnv.contains('/') ) {
250250
// check if it's a file path that might be a lock file
251-
final path = condaEnv as Path
252-
if( path.isDirectory() ) {
253-
if( path.fileSystem != FileSystems.default )
254-
throw new IllegalArgumentException("Conda prefix path must be a POSIX file path: $path")
255-
return path
251+
final prefix = condaEnv as Path
252+
if( prefix.isDirectory() ) {
253+
if( prefix.fileSystem != FileSystems.default )
254+
throw new IllegalArgumentException("Conda prefix path must be a POSIX file path: $prefix")
255+
return prefix
256256
}
257257
// it could be a file with a non-standard extension (e.g., .lock or no extension)
258-
if( Files.exists(path) ) {
259-
try {
260-
content = path.text
261-
// if it's a lock file, use it; otherwise treat as invalid
262-
if( !isLockFile(content) ) {
263-
throw new IllegalArgumentException("Conda environment file is not a valid lock file (missing @EXPLICIT marker): $condaEnv")
264-
}
265-
}
266-
catch( IllegalArgumentException e ) {
267-
throw e
268-
}
269-
catch( Exception e ) {
270-
throw new IllegalArgumentException("Error reading Conda environment file: $condaEnv -- Check the log file for details", e)
258+
if( Files.exists(prefix) ) {
259+
// if it's a lock file, read content for hashing (like YAML/text files); otherwise treat as invalid
260+
if( !isLockFile(prefix) ) {
261+
throw new IllegalArgumentException("Conda environment file is not a valid lock file (missing @EXPLICIT marker): $condaEnv")
271262
}
263+
// Note: file is read twice - once by isLockFile (first 20 lines only) and once here for full content.
264+
// This is intentional: isLockFile is memory-efficient for large files, while hashing needs full content.
265+
content = prefix.text
272266
}
273267
else {
274-
throw new IllegalArgumentException("Conda prefix path does not exist or is not a directory: $path")
268+
throw new IllegalArgumentException("Conda prefix path does not exist or is not a directory: $prefix")
275269
}
276270
}
277271
else if( condaEnv.contains('\n') ) {
@@ -319,6 +313,10 @@ class CondaCache {
319313
Paths.get(envFile).toAbsolutePath()
320314
}
321315

316+
@PackageScope boolean isYamlUriPath(String env) {
317+
env.startsWith('http://') || env.startsWith('https://')
318+
}
319+
322320
@PackageScope
323321
Path createLocalCondaEnv0(String condaEnv, Path prefixPath) {
324322
if( prefixPath.isDirectory() ) {
@@ -345,7 +343,7 @@ class CondaCache {
345343
// Check if it's a file path with non-standard extension that might be a lock file
346344
else if( condaEnv.contains('/') ) {
347345
final localPath = makeAbsolute(condaEnv)
348-
if( Files.exists(localPath) && isLockFile(localPath.text) ) {
346+
if( Files.exists(localPath) && isLockFile(localPath) ) {
349347
cmd = "${binaryName} create ${opts}--yes --quiet --prefix ${Escape.path(prefixPath)} --file ${Escape.path(localPath)}"
350348
}
351349
else {

modules/nextflow/src/test/groovy/nextflow/conda/CondaCacheTest.groovy

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package nextflow.conda
1818

1919
import java.nio.file.Files
20+
import java.nio.file.Path
2021
import java.nio.file.Paths
2122

2223
import nextflow.SysEnv
@@ -63,22 +64,32 @@ class CondaCacheTest extends Specification {
6364

6465
given:
6566
def cache = new CondaCache()
67+
def folder = Files.createTempDirectory('test')
6668

6769
expect:
6870
// Valid lock file content - @EXPLICIT marker present
69-
cache.isLockFile('@EXPLICIT\nhttps://conda.anaconda.org/conda-forge/linux-64/package-1.0.tar.bz2')
70-
cache.isLockFile('# This file may be used to create an environment\n@EXPLICIT\nhttps://url')
71-
cache.isLockFile('# comment\n# another comment\n@EXPLICIT\nhttps://url')
71+
isLockFileContent(cache, folder, '@EXPLICIT\nhttps://conda.anaconda.org/conda-forge/linux-64/package-1.0.tar.bz2')
72+
isLockFileContent(cache, folder, '# This file may be used to create an environment\n@EXPLICIT\nhttps://url')
73+
isLockFileContent(cache, folder, '# comment\n# another comment\n@EXPLICIT\nhttps://url')
7274
// With spaces/indentation
73-
cache.isLockFile(' @EXPLICIT \nhttps://url')
75+
isLockFileContent(cache, folder, ' @EXPLICIT \nhttps://url')
7476

7577
// Invalid lock file content - no @EXPLICIT marker
76-
!cache.isLockFile('foo=1.0')
77-
!cache.isLockFile('channels:\n - conda-forge\ndependencies:\n - bwa')
78-
!cache.isLockFile('')
78+
!isLockFileContent(cache, folder, 'foo=1.0')
79+
!isLockFileContent(cache, folder, 'channels:\n - conda-forge\ndependencies:\n - bwa')
80+
!isLockFileContent(cache, folder, '')
7981
!cache.isLockFile(null)
8082
// @EXPLICIT after 20 lines should not be detected
81-
!cache.isLockFile((1..25).collect { "# line $it" }.join('\n') + '\n@EXPLICIT')
83+
!isLockFileContent(cache, folder, (1..25).collect { "# line $it" }.join('\n') + '\n@EXPLICIT')
84+
85+
cleanup:
86+
folder?.deleteDir()
87+
}
88+
89+
private boolean isLockFileContent(CondaCache cache, Path folder, String content) {
90+
def file = folder.resolve("test-${System.nanoTime()}.lock")
91+
file.text = content
92+
return cache.isLockFile(file)
8293
}
8394

8495
def 'should detect yaml uri path' () {

0 commit comments

Comments
 (0)