1818package org .ohdsi .whiteRabbit .scan ;
1919
2020import java .io .*;
21+ import java .nio .file .Files ;
22+ import java .nio .file .Path ;
23+ import java .nio .file .Paths ;
24+ import java .rmi .RemoteException ;
2125import java .sql .ResultSet ;
2226import java .sql .SQLException ;
2327import java .time .LocalDate ;
2933import com .epam .parso .SasFileProperties ;
3034import com .epam .parso .SasFileReader ;
3135import com .epam .parso .impl .SasFileReaderImpl ;
36+ import org .apache .commons .lang .StringUtils ;
3237import org .apache .poi .ss .usermodel .Cell ;
3338import org .apache .poi .ss .usermodel .CellStyle ;
3439import org .apache .poi .ss .usermodel .Row ;
3540import org .apache .poi .ss .usermodel .Sheet ;
3641import org .apache .poi .xssf .streaming .SXSSFWorkbook ;
42+ import org .apache .commons .io .FileUtils ;
3743import org .ohdsi .databases .DbType ;
3844import org .ohdsi .databases .RichConnection ;
3945import org .ohdsi .databases .RichConnection .QueryResult ;
4955
5056public class SourceDataScan {
5157
52- public static int MAX_VALUES_IN_MEMORY = 100000 ;
53- public static int MIN_CELL_COUNT_FOR_CSV = 1000000 ;
54- public static int N_FOR_FREE_TEXT_CHECK = 1000 ;
55- public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100 ;
58+ public static int MAX_VALUES_IN_MEMORY = 100000 ;
59+ public static int MIN_CELL_COUNT_FOR_CSV = 1000000 ;
60+ public static int N_FOR_FREE_TEXT_CHECK = 1000 ;
61+ public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100 ;
62+
63+ public final static String SCAN_REPORT_FILE_NAME = "ScanReport.xlsx" ;
64+
65+ public static final String POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME = "ORG_OHDSI_WHITERABBIT_POI_TMPDIR" ;
66+ public static final String POI_TMP_DIR_PROPERTY_NAME = "org.ohdsi.whiterabbit.poi.tmpdir" ;
5667
5768 private SXSSFWorkbook workbook ;
5869 private char delimiter = ',' ;
@@ -70,6 +81,15 @@ public class SourceDataScan {
7081
7182 private LocalDateTime startTimeStamp ;
7283
84+ static final String poiTmpPath ;
85+
86+ static {
87+ try {
88+ poiTmpPath = setUniqueTempDirStrategyForApachePoi ();
89+ } catch (IOException e ) {
90+ throw new RuntimeException (e );
91+ }
92+ }
7393
7494 public void setSampleSize (int sampleSize ) {
7595 // -1 if sample size is not restricted
@@ -117,6 +137,78 @@ public void process(DbSettings dbSettings, String outputFileName) {
117137 generateReport (outputFileName );
118138 }
119139
140+ /*
141+ * Implements a strategy for the tmp dir to ise for files for apache poi
142+ * Attempts to solve an issue where some users report not having write access to the poi tmp dir
143+ * (see https://github.com/OHDSI/WhiteRabbit/issues/293). Vry likely this is caused by the poi tmp dir
144+ * being created on a multi-user system by a user with a too restrictive file mask.
145+ */
146+ public static String setUniqueTempDirStrategyForApachePoi () throws IOException {
147+ Path myTmpDir = getDefaultPoiTmpPath (FileUtils .getTempDirectory ().toPath ());
148+ String userConfiguredPoiTmpDir = getUserConfiguredPoiTmpDir ();
149+ if (!StringUtils .isEmpty (userConfiguredPoiTmpDir )) {
150+ myTmpDir = setupTmpDir (Paths .get (userConfiguredPoiTmpDir ));
151+ } else {
152+ if (isNotWritable (myTmpDir )) {
153+ // avoid the poi files directory entirely by creating a separate directory in the standard tmp dir
154+ myTmpDir = setupTmpDir (FileUtils .getTempDirectory ().toPath ());
155+ }
156+ }
157+
158+ String tmpDir = myTmpDir .toFile ().getAbsolutePath ();
159+ checkWritableTmpDir (tmpDir );
160+ return tmpDir ;
161+ }
162+
163+ public static Path getDefaultPoiTmpPath (Path tmpRoot ) {
164+ // TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal
165+ final String poiFilesDir = "poifiles" ; // copied from poi implementation 4.x
166+ return tmpRoot .resolve (poiFilesDir );
167+ }
168+
169+ private static Path setupTmpDir (Path tmpDir ) {
170+ checkWritableTmpDir (tmpDir .toFile ().getAbsolutePath ());
171+ Path myTmpDir = Paths .get (tmpDir .toFile ().getAbsolutePath (), UUID .randomUUID ().toString ());
172+ try {
173+ Files .createDirectory (myTmpDir );
174+ org .apache .poi .util .TempFile .setTempFileCreationStrategy (new org .apache .poi .util .DefaultTempFileCreationStrategy (myTmpDir .toFile ()));
175+ } catch (IOException ioException ) {
176+ throw new RuntimeException (String .format ("Exception while creating directory %s" , myTmpDir ), ioException );
177+ }
178+ return myTmpDir ;
179+ }
180+
181+ private static void checkWritableTmpDir (String dir ) {
182+ if (isNotWritable (Paths .get (dir ))) {
183+ String message = String .format ("Directory %s is not writable! (used for tmp files for Apache POI)" , dir );
184+ System .out .println (message );
185+ throw new RuntimeException (message );
186+ }
187+ }
188+
189+ private static String getUserConfiguredPoiTmpDir () {
190+ // search for a user configured dir for poi tmp files. Env.var. overrules Java property.
191+ String userConfiguredDir = System .getenv (POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME );
192+ if (StringUtils .isEmpty (userConfiguredDir )) {
193+ userConfiguredDir = System .getProperty (POI_TMP_DIR_PROPERTY_NAME );
194+ }
195+ return userConfiguredDir ;
196+ }
197+
198+ public static boolean isNotWritable (Path path ) {
199+ final Path testFile = path .resolve ("test.txt" );
200+ if (Files .exists (path ) && Files .isDirectory (path )) {
201+ try {
202+ Files .createFile (testFile );
203+ Files .delete (testFile );
204+ } catch (IOException e ) {
205+ return true ;
206+ }
207+ return false ;
208+ }
209+ return true ;
210+ }
211+
120212 private void processDatabase (DbSettings dbSettings ) {
121213 // GBQ requires database. Put database value into domain var
122214 if (dbSettings .dbType == DbType .BIGQUERY ) {
@@ -486,11 +578,11 @@ else if (dbType == DbType.MSSQL || dbType == DbType.PDW) {
486578 trimmedDatabase = database .substring (1 , database .length () - 1 );
487579 String [] parts = table .split ("\\ ." );
488580 query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_CATALOG='" + trimmedDatabase + "' AND TABLE_SCHEMA='" + parts [0 ] +
489- "' AND TABLE_NAME='" + parts [1 ] + "';" ;
581+ "' AND TABLE_NAME='" + parts [1 ] + "';" ;
490582 } else if (dbType == DbType .AZURE ) {
491583 String [] parts = table .split ("\\ ." );
492584 query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='" + parts [0 ] +
493- "' AND TABLE_NAME='" + parts [1 ] + "';" ;
585+ "' AND TABLE_NAME='" + parts [1 ] + "';" ;
494586 } else if (dbType == DbType .MYSQL )
495587 query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '" + database + "' AND TABLE_NAME = '" + table
496588 + "';" ;
@@ -500,8 +592,7 @@ else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT)
500592 else if (dbType == DbType .TERADATA ) {
501593 query = "SELECT ColumnName, ColumnType FROM dbc.columns WHERE DatabaseName= '" + database .toLowerCase () + "' AND TableName = '"
502594 + table .toLowerCase () + "';" ;
503- }
504- else if (dbType == DbType .BIGQUERY ) {
595+ } else if (dbType == DbType .BIGQUERY ) {
505596 query = "SELECT column_name AS COLUMN_NAME, data_type as DATA_TYPE FROM " + database + ".INFORMATION_SCHEMA.COLUMNS WHERE table_name = \" " + table + "\" ;" ;
506597 }
507598
@@ -735,7 +826,6 @@ public void processValue(String value) {
735826 samplingReservoir .add (DateUtilities .parseDate (trimValue ));
736827 }
737828 }
738-
739829 }
740830
741831 public List <Pair <String , Integer >> getSortedValuesWithoutSmallValues () {
0 commit comments