Skip to content

Commit

Permalink
minor change
Browse files Browse the repository at this point in the history
  • Loading branch information
enriquea committed Jan 11, 2024
1 parent cd1208f commit ebbb3bc
Show file tree
Hide file tree
Showing 9 changed files with 17 additions and 17 deletions.
10 changes: 5 additions & 5 deletions docs/README.data.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,20 @@ contains the response variable.
#### How to create a Feature Selection Spark Data Frame (FSDF)

```python
from fsspark.context import init_spark, stop_spark_session
from fsspark.config.context import init_spark, stop_spark_session
from fsspark.fs.core import FSDataFrame
from fsspark.utils.io import import_table_as_psdf

# Init spark
init_spark()

# Import data
psdf = import_table_as_psdf('data.tsv.bgz',
sep='\t',
psdf = import_table_as_psdf('data.tsv.bgz',
sep='\t',
n_partitions=5)
# Create FSDataFrame
fsdf = FSDataFrame(psdf,
sample_col='sample_id',
fsdf = FSDataFrame(psdf,
sample_col='sample_id',
label_col='response')
# Stop spark
stop_spark_session()
Expand Down
2 changes: 1 addition & 1 deletion docs/README.methods.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ multivariate correlation filter and Randon Forest classification.
"""

from fsspark.context import init_spark, stop_spark_session
from fsspark.config.context import init_spark, stop_spark_session
from fsspark.fs.core import FSDataFrame
from fsspark.fs.ml import cv_rf_classification, get_accuracy, get_predictions
from fsspark.fs.multivariate import multivariate_filter
Expand Down
Empty file added fsspark/config/__init__.py
Empty file.
6 changes: 3 additions & 3 deletions fsspark/context.py → fsspark/config/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import pyspark
from pyspark.sql import SparkSession

from fsspark.global_settings import (SPARK_EXTRA_SETTINGS,
PYARROW_SETTINGS,
PANDAS_ON_SPARK_API_SETTINGS)
from fsspark.config.global_settings import (SPARK_EXTRA_SETTINGS,
PYARROW_SETTINGS,
PANDAS_ON_SPARK_API_SETTINGS)


os.environ['PYARROW_IGNORE_TIMEZONE'] = "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

# spark settings to test this module locally.
SPARK_EXTRA_SETTINGS = {'spark.executor.memory': '8g',
'spark.driver.memory': '20g',
'spark.driver.memory': '16g',
"spark.memory.offHeap.enabled": 'true',
"spark.memory.offHeap.size": '2g',
"spark.sql.pivotMaxValues": '60000',
"spark.memory.offHeap.size": '4g',
"spark.sql.pivotMaxValues": '100000',
"spark.network.timeout": '100000',
"spark.sql.session.timeZone": "UTC"
}
Expand Down
2 changes: 1 addition & 1 deletion fsspark/pipeline/fs_corr_rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""

from fsspark.context import init_spark, stop_spark_session
from fsspark.config.context import init_spark, stop_spark_session
from fsspark.fs.core import FSDataFrame
from fsspark.fs.ml import cv_rf_classification, get_accuracy, get_predictions, get_feature_scores
from fsspark.fs.multivariate import multivariate_filter
Expand Down
4 changes: 2 additions & 2 deletions fsspark/tests/test_FSDataFrame.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import unittest

from fsspark.context import init_spark, stop_spark_session
from fsspark.config.context import init_spark, stop_spark_session
from fsspark.fs.core import FSDataFrame
from fsspark.utils.datasets import get_tnbc_data_path
from fsspark.utils.io import import_table, import_table_as_psdf
from fsspark.utils.io import import_table_as_psdf


class FSDataFrameTest(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion fsspark/tests/test_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pyspark
import pyspark.pandas as ps

from fsspark.context import init_spark, stop_spark_session
from fsspark.config.context import init_spark, stop_spark_session
from fsspark.utils.datasets import get_tnbc_data_path
from fsspark.utils.io import import_table, import_table_as_psdf

Expand Down
2 changes: 1 addition & 1 deletion fsspark/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pyspark.pandas
import pyspark.sql

from fsspark.context import PANDAS_ON_SPARK_API_SETTINGS
from fsspark.config.context import PANDAS_ON_SPARK_API_SETTINGS

warnings.filterwarnings("ignore")

Expand Down

0 comments on commit ebbb3bc

Please sign in to comment.