Nixos Cannot run program "/bin/ls"

Hello,

I try to run pySpark with spark-nlp

during a “post installation” there is an error that /bin/ls does not exist.

  • What is the best way in nixos to solve that issue
    – would I only put a symlink ?
Py4JJavaError: An error occurred while calling o87.partitions.
: java.lang.RuntimeException: Error while running command to get file permissions : java.io.IOException: Cannot run program "/bin/ls": error=2, No such file or directory
	at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)

There are two solutions:

  1. Patching, i.e. replace /bin/ls with “${coreutils}/bin/ls”, this require to build it as a package
  2. Provide the path with fhsuserenv i.e. https://gist.github.com/Mic92/b59054188c595e5652cacf50485583e0 provides a container-like environment where /bin/ls would be provided just for the nix-shell session.

Of course if you just want to get it quickly done and the its only needed for installation, than a symlink from /run/current-system/sw/bin/ls to /bin/ls could also serve as a temporary hack.

sorry, i think I have to be more precise:

The python env is created via mach-nix.


I cannot see in which package or library the “/bin/ls” is included

Import Spark NLP

from sparknlp.base import *
from sparknlp.annotator import *
from sparknlp.pretrained import PretrainedPipeline
import sparknlp

spark = sparknlp.start()
pipeline = PretrainedPipeline(name= ‘explain_document_dl’, lang=‘en’ , )

explain_document_dl download started this may take some time.
Approx size to download 168.4 MB
[OK!]
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-39-ff889a0cd27d> in <module>
      1 # Download a pre-trained pipeline
----> 2 pipeline = PretrainedPipeline(name= 'explain_document_dl', lang='en' , )
      3 
      4 #pipeline = PretrainedPipeline(name= 'explain_document_dl', lang='en' , disk_location= dwld_models )

/nix/store/i72461v4c9wlgln0mn6zjc0gyjl5zcyi-python3-3.7.8-env/lib/python3.7/site-packages/sparknlp/pretrained.py in __init__(self, name, lang, remote_loc, parse_embeddings, disk_location)
     89     def __init__(self, name, lang='en', remote_loc=None, parse_embeddings=False, disk_location=None):
     90         if not disk_location:
---> 91             self.model = ResourceDownloader().downloadPipeline(name, lang, remote_loc)
     92         else:
     93             self.model = PipelineModel.load(disk_location)

/nix/store/i72461v4c9wlgln0mn6zjc0gyjl5zcyi-python3-3.7.8-env/lib/python3.7/site-packages/sparknlp/pretrained.py in downloadPipeline(name, language, remote_loc)
     58             t1.start()
     59             try:
---> 60                 j_obj = _internal._DownloadPipeline(name, language, remote_loc).apply()
     61                 jmodel = PipelineModel._from_java(j_obj)
     62             finally:

/nix/store/i72461v4c9wlgln0mn6zjc0gyjl5zcyi-python3-3.7.8-env/lib/python3.7/site-packages/sparknlp/internal.py in __init__(self, name, language, remote_loc)
    179 class _DownloadPipeline(ExtendedJavaWrapper):
    180     def __init__(self, name, language, remote_loc):
--> 181         super(_DownloadPipeline, self).__init__("com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.downloadPipeline", name, language, remote_loc)
    182 
    183 

/nix/store/i72461v4c9wlgln0mn6zjc0gyjl5zcyi-python3-3.7.8-env/lib/python3.7/site-packages/sparknlp/internal.py in __init__(self, java_obj, *args)
    127         super(ExtendedJavaWrapper, self).__init__(java_obj)
    128         self.sc = SparkContext._active_spark_context
--> 129         self._java_obj = self.new_java_obj(java_obj, *args)
    130         self.java_obj = self._java_obj
    131 

/nix/store/i72461v4c9wlgln0mn6zjc0gyjl5zcyi-python3-3.7.8-env/lib/python3.7/site-packages/sparknlp/internal.py in new_java_obj(self, java_class, *args)
    137 
    138     def new_java_obj(self, java_class, *args):
--> 139         return self._new_java_obj(java_class, *args)
    140 
    141     def new_java_array(self, pylist, java_class):

/nix/store/i72461v4c9wlgln0mn6zjc0gyjl5zcyi-python3-3.7.8-env/lib/python3.7/site-packages/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
     65             java_obj = getattr(java_obj, name)
     66         java_args = [_py2java(sc, arg) for arg in args]
---> 67         return java_obj(*java_args)
     68 
     69     @staticmethod

/nix/store/i72461v4c9wlgln0mn6zjc0gyjl5zcyi-python3-3.7.8-env/lib/python3.7/site-packages/py4j/java_gateway.py in __call__(self, *args)
   1255         answer = self.gateway_client.send_command(command)
   1256         return_value = get_return_value(
-> 1257             answer, self.gateway_client, self.target_id, self.name)
   1258 
   1259         for temp_arg in temp_args:

/nix/store/i72461v4c9wlgln0mn6zjc0gyjl5zcyi-python3-3.7.8-env/lib/python3.7/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
     61     def deco(*a, **kw):
     62         try:
---> 63             return f(*a, **kw)
     64         except py4j.protocol.Py4JJavaError as e:
     65             s = e.java_exception.toString()

/nix/store/i72461v4c9wlgln0mn6zjc0gyjl5zcyi-python3-3.7.8-env/lib/python3.7/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
--> 328                     format(target_id, ".", name), value)
    329             else:
    330                 raise Py4JError(

Py4JJavaError: An error occurred while calling z:com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.downloadPipeline.
: java.lang.RuntimeException: Error while running command to get file permissions : java.io.IOException: Cannot run program "/bin/ls": error=2, No such file or directory
	at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)
	at org.apache.hadoop.util.Shell.runCommand(Shell.java:523)
	at org.apache.hadoop.util.Shell.run(Shell.java:479)
	at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
	at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
	at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
	at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097)
	at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:659)
	at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:634)
	at org.apache.hadoop.fs.LocatedFileStatus.<init>(LocatedFileStatus.java:49)
	at org.apache.hadoop.fs.FileSystem$4.next(FileSystem.java:1733)
	at org.apache.hadoop.fs.FileSystem$4.next(FileSystem.java:1713)
	at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:270)
	at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:229)
	at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)
	at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:204)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:273)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:269)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:269)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:273)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:269)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:269)
	at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1388)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
	at org.apache.spark.rdd.RDD.take(RDD.scala:1382)
	at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1423)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
	at org.apache.spark.rdd.RDD.first(RDD.scala:1422)
	at org.apache.spark.ml.util.DefaultParamsReader$.loadMetadata(ReadWrite.scala:615)
	at org.apache.spark.ml.Pipeline$SharedReadWrite$.load(Pipeline.scala:267)
	at org.apache.spark.ml.PipelineModel$PipelineModelReader.load(Pipeline.scala:348)
	at org.apache.spark.ml.PipelineModel$PipelineModelReader.load(Pipeline.scala:342)
	at com.johnsnowlabs.nlp.pretrained.ResourceDownloader$.downloadPipeline(ResourceDownloader.scala:376)
	at com.johnsnowlabs.nlp.pretrained.ResourceDownloader$.downloadPipeline(ResourceDownloader.scala:370)
	at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader$.downloadPipeline(ResourceDownloader.scala:470)
	at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.downloadPipeline(ResourceDownloader.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: error=2, No such file or directory
	at java.lang.UNIXProcess.forkAndExec(Native Method)
	at java.lang.UNIXProcess.<init>(UNIXProcess.java:247)
	at java.lang.ProcessImpl.start(ProcessImpl.java:134)
	at java.lang.ProcessBuilder.start(ProcessBuilder.java:1029)
	... 53 more

	at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:699)
	at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:634)
	at org.apache.hadoop.fs.LocatedFileStatus.<init>(LocatedFileStatus.java:49)
	at org.apache.hadoop.fs.FileSystem$4.next(FileSystem.java:1733)
	at org.apache.hadoop.fs.FileSystem$4.next(FileSystem.java:1713)
	at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:270)
	at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:229)
	at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)
	at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:204)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:273)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:269)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:269)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:273)
	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:269)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:269)
	at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1388)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
	at org.apache.spark.rdd.RDD.take(RDD.scala:1382)
	at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1423)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
	at org.apache.spark.rdd.RDD.first(RDD.scala:1422)
	at org.apache.spark.ml.util.DefaultParamsReader$.loadMetadata(ReadWrite.scala:615)
	at org.apache.spark.ml.Pipeline$SharedReadWrite$.load(Pipeline.scala:267)
	at org.apache.spark.ml.PipelineModel$PipelineModelReader.load(Pipeline.scala:348)
	at org.apache.spark.ml.PipelineModel$PipelineModelReader.load(Pipeline.scala:342)
	at com.johnsnowlabs.nlp.pretrained.ResourceDownloader$.downloadPipeline(ResourceDownloader.scala:376)
	at com.johnsnowlabs.nlp.pretrained.ResourceDownloader$.downloadPipeline(ResourceDownloader.scala:370)
	at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader$.downloadPipeline(ResourceDownloader.scala:470)
	at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.downloadPipeline(ResourceDownloader.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)