Skip to content

Commit 490350e

Browse files
Merge branch 'master' into develop
2 parents 2282c2a + c7073da commit 490350e

File tree

8 files changed

+364
-38
lines changed

8 files changed

+364
-38
lines changed

docs/WhiteRabbit.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,23 @@ To increase the memory (in this example to 2400m), either set the environment va
4545
To lower the memory, set one of these variables to e.g. `-Xmx600m`.
4646
If you have a 32-bit Java VM installed and problems persist, consider installing 64-bit Java.
4747

48+
### Temporary Directory for Apache POI
49+
(This addresses [issue 293](https://github.com/OHDSI/WhiteRabbit/issues/293))
50+
51+
The Apache POI library is used for generating the scan report in Excel format. This library creates its own directory for
52+
temporary files in the system temporary directory. In [issue 293](https://github.com/OHDSI/WhiteRabbit/issues/293) it has
53+
been reported that this can cause problems in a multi-user environment, when multiple user attempt to create this directory
54+
with too restrictive permissions (read-only for other users).
55+
WhiteRabbit from version 0.10.9 attempts to circumvent this automatically, but this workaround can fail due
56+
to concurrency problems. If you want to prevent this from happening entirely , you can set either the environment variable
57+
```ORG_OHDSI_WHITERABBIT_POI_TMPDIR``` or the Java system property ```org.ohdsi.whiterabbit.poi.tmpdir``` to a
58+
temporary directory of your choice when starting WhiteRabbit (best would be to add this to the ```whiteRabbit``` or
59+
```whiteRabbit.bat``` script). Please note that this directory should exist before your start WhiteRabbit,
60+
and that it should be writable by any user that may want to run WhiteRabbit.
61+
For each user a separate subdirectory will be created, so that permission related conflicts should be avoided.
62+
Also, WhiteRabbit now attempts to detect this situation before the scan starts. If this is detected,
63+
the scan is not started, and the problem identified before the scan, instead of afterwards.
64+
4865
## Support
4966
All source code, descriptions and input/output examples are available on GitHub: <https://github.com/OHDSI/WhiteRabbit>
5067

rabbit-core/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
<properties>
1616
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
17+
<apache-poi-version>4.1.2</apache-poi-version>
1718
</properties>
1819

1920
<dependencies>
@@ -74,7 +75,7 @@
7475
<dependency>
7576
<groupId>org.apache.poi</groupId>
7677
<artifactId>poi-ooxml-schemas</artifactId>
77-
<version>4.1.2</version>
78+
<version>4.1.2</version>
7879
</dependency>
7980
<!-- Note: Apache xmlbeans v4.x and v5.x is incompatible with Apache poi v4-->
8081
<dependency>
@@ -233,6 +234,5 @@
233234
<artifactId>ant</artifactId>
234235
<version>1.10.13</version>
235236
</dependency>
236-
237237
</dependencies>
238238
</project>

whiterabbit/pom.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@
6666
<type>pom</type>
6767
<scope>import</scope>
6868
</dependency>
69+
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
70+
<dependency>
71+
<groupId>commons-io</groupId>
72+
<artifactId>commons-io</artifactId>
73+
<version>2.11.0</version>
74+
</dependency>
6975
</dependencies>
7076
</dependencyManagement>
7177

@@ -80,6 +86,13 @@
8086
<artifactId>slf4j-simple</artifactId>
8187
<version>1.7.30</version>
8288
</dependency>
89+
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
90+
<dependency>
91+
<groupId>commons-io</groupId>
92+
<artifactId>commons-io</artifactId>
93+
<version>2.11.0</version>
94+
</dependency>
95+
8396

8497
<!-- https://mvnrepository.com/artifact/org.testcontainers/testcontainers -->
8598
<dependency>

whiterabbit/src/main/java/org/ohdsi/whiteRabbit/WhiteRabbitMain.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("BigQuery")) {
265265
sourceDataScan.setMaxValues(maxValues);
266266
sourceDataScan.setCalculateNumericStats(calculateNumericStats);
267267
sourceDataScan.setNumStatsSamplerSize(numericStatsSamplerSize);
268-
sourceDataScan.process(dbSettings, iniFile.get("WORKING_FOLDER") + "/ScanReport.xlsx");
268+
sourceDataScan.process(dbSettings, iniFile.get("WORKING_FOLDER") + "/" + SourceDataScan.SCAN_REPORT_FILE_NAME);
269269
}
270270

271271
private JComponent createTabsPanel() {
@@ -543,7 +543,7 @@ private JPanel createFakeDataPanel() {
543543
folderPanel.setLayout(new BoxLayout(folderPanel, BoxLayout.X_AXIS));
544544
folderPanel.setBorder(BorderFactory.createTitledBorder("Scan report file"));
545545
scanReportFileField = new JTextField();
546-
scanReportFileField.setText((new File("ScanReport.xlsx").getAbsolutePath()));
546+
scanReportFileField.setText((new File(SourceDataScan.SCAN_REPORT_FILE_NAME).getAbsolutePath()));
547547
scanReportFileField.setToolTipText("The path to the scan report that will be used as a template to generate the fake data");
548548
folderPanel.add(scanReportFileField);
549549
JButton pickButton = new JButton("Pick file");
@@ -1051,7 +1051,7 @@ public void run() {
10511051
table = folderField.getText() + "/" + table;
10521052
dbSettings.tables.add(table);
10531053
}
1054-
sourceDataScan.process(dbSettings, folderField.getText() + "/ScanReport.xlsx");
1054+
sourceDataScan.process(dbSettings, folderField.getText() + "/" + SourceDataScan.SCAN_REPORT_FILE_NAME);
10551055
}
10561056
} catch (Exception e) {
10571057
handleError(e);

whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java

Lines changed: 99 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
package org.ohdsi.whiteRabbit.scan;
1919

2020
import java.io.*;
21+
import java.nio.file.Files;
22+
import java.nio.file.Path;
23+
import java.nio.file.Paths;
24+
import java.rmi.RemoteException;
2125
import java.sql.ResultSet;
2226
import java.sql.SQLException;
2327
import java.time.LocalDate;
@@ -29,11 +33,13 @@
2933
import com.epam.parso.SasFileProperties;
3034
import com.epam.parso.SasFileReader;
3135
import com.epam.parso.impl.SasFileReaderImpl;
36+
import org.apache.commons.lang.StringUtils;
3237
import org.apache.poi.ss.usermodel.Cell;
3338
import org.apache.poi.ss.usermodel.CellStyle;
3439
import org.apache.poi.ss.usermodel.Row;
3540
import org.apache.poi.ss.usermodel.Sheet;
3641
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
42+
import org.apache.commons.io.FileUtils;
3743
import org.ohdsi.databases.DbType;
3844
import org.ohdsi.databases.RichConnection;
3945
import org.ohdsi.databases.RichConnection.QueryResult;
@@ -49,10 +55,15 @@
4955

5056
public class SourceDataScan {
5157

52-
public static int MAX_VALUES_IN_MEMORY = 100000;
53-
public static int MIN_CELL_COUNT_FOR_CSV = 1000000;
54-
public static int N_FOR_FREE_TEXT_CHECK = 1000;
55-
public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100;
58+
public static int MAX_VALUES_IN_MEMORY = 100000;
59+
public static int MIN_CELL_COUNT_FOR_CSV = 1000000;
60+
public static int N_FOR_FREE_TEXT_CHECK = 1000;
61+
public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100;
62+
63+
public final static String SCAN_REPORT_FILE_NAME = "ScanReport.xlsx";
64+
65+
public static final String POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME = "ORG_OHDSI_WHITERABBIT_POI_TMPDIR";
66+
public static final String POI_TMP_DIR_PROPERTY_NAME = "org.ohdsi.whiterabbit.poi.tmpdir";
5667

5768
private SXSSFWorkbook workbook;
5869
private char delimiter = ',';
@@ -70,6 +81,15 @@ public class SourceDataScan {
7081

7182
private LocalDateTime startTimeStamp;
7283

84+
static final String poiTmpPath;
85+
86+
static {
87+
try {
88+
poiTmpPath = setUniqueTempDirStrategyForApachePoi();
89+
} catch (IOException e) {
90+
throw new RuntimeException(e);
91+
}
92+
}
7393

7494
public void setSampleSize(int sampleSize) {
7595
// -1 if sample size is not restricted
@@ -117,6 +137,78 @@ public void process(DbSettings dbSettings, String outputFileName) {
117137
generateReport(outputFileName);
118138
}
119139

140+
/*
141+
* Implements a strategy for the tmp dir to ise for files for apache poi
142+
* Attempts to solve an issue where some users report not having write access to the poi tmp dir
143+
* (see https://github.com/OHDSI/WhiteRabbit/issues/293). Vry likely this is caused by the poi tmp dir
144+
* being created on a multi-user system by a user with a too restrictive file mask.
145+
*/
146+
public static String setUniqueTempDirStrategyForApachePoi() throws IOException {
147+
Path myTmpDir = getDefaultPoiTmpPath(FileUtils.getTempDirectory().toPath());
148+
String userConfiguredPoiTmpDir = getUserConfiguredPoiTmpDir();
149+
if (!StringUtils.isEmpty(userConfiguredPoiTmpDir)) {
150+
myTmpDir = setupTmpDir(Paths.get(userConfiguredPoiTmpDir));
151+
} else {
152+
if (isNotWritable(myTmpDir)) {
153+
// avoid the poi files directory entirely by creating a separate directory in the standard tmp dir
154+
myTmpDir = setupTmpDir(FileUtils.getTempDirectory().toPath());
155+
}
156+
}
157+
158+
String tmpDir = myTmpDir.toFile().getAbsolutePath();
159+
checkWritableTmpDir(tmpDir);
160+
return tmpDir;
161+
}
162+
163+
public static Path getDefaultPoiTmpPath(Path tmpRoot) {
164+
// TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal
165+
final String poiFilesDir = "poifiles"; // copied from poi implementation 4.x
166+
return tmpRoot.resolve(poiFilesDir);
167+
}
168+
169+
private static Path setupTmpDir(Path tmpDir) {
170+
checkWritableTmpDir(tmpDir.toFile().getAbsolutePath());
171+
Path myTmpDir = Paths.get(tmpDir.toFile().getAbsolutePath(), UUID.randomUUID().toString());
172+
try {
173+
Files.createDirectory(myTmpDir);
174+
org.apache.poi.util.TempFile.setTempFileCreationStrategy(new org.apache.poi.util.DefaultTempFileCreationStrategy(myTmpDir.toFile()));
175+
} catch (IOException ioException) {
176+
throw new RuntimeException(String.format("Exception while creating directory %s", myTmpDir), ioException);
177+
}
178+
return myTmpDir;
179+
}
180+
181+
private static void checkWritableTmpDir(String dir) {
182+
if (isNotWritable(Paths.get(dir))) {
183+
String message = String.format("Directory %s is not writable! (used for tmp files for Apache POI)", dir);
184+
System.out.println(message);
185+
throw new RuntimeException(message);
186+
}
187+
}
188+
189+
private static String getUserConfiguredPoiTmpDir() {
190+
// search for a user configured dir for poi tmp files. Env.var. overrules Java property.
191+
String userConfiguredDir = System.getenv(POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME);
192+
if (StringUtils.isEmpty(userConfiguredDir)) {
193+
userConfiguredDir = System.getProperty(POI_TMP_DIR_PROPERTY_NAME);
194+
}
195+
return userConfiguredDir;
196+
}
197+
198+
public static boolean isNotWritable(Path path) {
199+
final Path testFile = path.resolve("test.txt");
200+
if (Files.exists(path) && Files.isDirectory(path)) {
201+
try {
202+
Files.createFile(testFile);
203+
Files.delete(testFile);
204+
} catch (IOException e) {
205+
return true;
206+
}
207+
return false;
208+
}
209+
return true;
210+
}
211+
120212
private void processDatabase(DbSettings dbSettings) {
121213
// GBQ requires database. Put database value into domain var
122214
if (dbSettings.dbType == DbType.BIGQUERY) {
@@ -486,11 +578,11 @@ else if (dbType == DbType.MSSQL || dbType == DbType.PDW) {
486578
trimmedDatabase = database.substring(1, database.length() - 1);
487579
String[] parts = table.split("\\.");
488580
query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_CATALOG='" + trimmedDatabase + "' AND TABLE_SCHEMA='" + parts[0] +
489-
"' AND TABLE_NAME='" + parts[1] + "';";
581+
"' AND TABLE_NAME='" + parts[1] + "';";
490582
} else if (dbType == DbType.AZURE) {
491583
String[] parts = table.split("\\.");
492584
query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='" + parts[0] +
493-
"' AND TABLE_NAME='" + parts[1] + "';";
585+
"' AND TABLE_NAME='" + parts[1] + "';";
494586
} else if (dbType == DbType.MYSQL)
495587
query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '" + database + "' AND TABLE_NAME = '" + table
496588
+ "';";
@@ -500,8 +592,7 @@ else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT)
500592
else if (dbType == DbType.TERADATA) {
501593
query = "SELECT ColumnName, ColumnType FROM dbc.columns WHERE DatabaseName= '" + database.toLowerCase() + "' AND TableName = '"
502594
+ table.toLowerCase() + "';";
503-
}
504-
else if (dbType == DbType.BIGQUERY) {
595+
} else if (dbType == DbType.BIGQUERY) {
505596
query = "SELECT column_name AS COLUMN_NAME, data_type as DATA_TYPE FROM " + database + ".INFORMATION_SCHEMA.COLUMNS WHERE table_name = \"" + table + "\";";
506597
}
507598

@@ -735,7 +826,6 @@ public void processValue(String value) {
735826
samplingReservoir.add(DateUtilities.parseDate(trimValue));
736827
}
737828
}
738-
739829
}
740830

741831
public List<Pair<String, Integer>> getSortedValuesWithoutSmallValues() {

whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
package org.ohdsi.whiterabbit.scan;
22

33
import org.ohdsi.databases.DbType;
4+
import org.ohdsi.databases.RichConnection;
45
import org.ohdsi.ooxml.ReadXlsxFileWithHeader;
56
import org.ohdsi.utilities.files.Row;
67
import org.ohdsi.utilities.files.RowUtilities;
8+
import org.ohdsi.whiteRabbit.DbSettings;
9+
import org.testcontainers.containers.PostgreSQLContainer;
710

811
import java.io.File;
912
import java.io.FileInputStream;
@@ -98,4 +101,27 @@ else if (dbType == DbType.ORACLE){
98101
throw new RuntimeException("Unsupported DBType: " + dbType);
99102
}
100103
}
104+
105+
static DbSettings getTestPostgreSQLSettings(PostgreSQLContainer<?> container) {
106+
DbSettings dbSettings = new DbSettings();
107+
dbSettings.dbType = DbType.POSTGRESQL;
108+
dbSettings.sourceType = DbSettings.SourceType.DATABASE;
109+
dbSettings.server = container.getJdbcUrl();
110+
dbSettings.database = "public"; // yes, really
111+
dbSettings.user = container.getUsername();
112+
dbSettings.password = container.getPassword();
113+
dbSettings.tables = getTableNamesPostgreSQL(dbSettings);
114+
115+
return dbSettings;
116+
}
117+
118+
static List<String> getTableNamesPostgreSQL(DbSettings dbSettings) {
119+
try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) {
120+
return richConnection.getTableNames("public");
121+
}
122+
}
123+
124+
125+
126+
101127
}

0 commit comments

Comments
 (0)