hana-ml
2.22.241011
  • Python Machine Learning Client for SAP HANA
    • Prerequisites
    • SAP HANA DataFrame
    • Machine Learning API
    • End-to-End Example: Using SAP HANA Predictive Analysis Library (PAL) Module
    • End-to-End Example: Using SAP HANA Automated Predictive Library (APL) Module
    • Visualizers Module
    • Spatial and Graph Features
    • Summary
  • Installation Guide
  • hana-ml Tutorials
  • Changelog
  • hana_ml.dataframe
    • quotename()
    • ConnectionContext
      • ConnectionContext.enable_abap_sql()
      • ConnectionContext.disable_abap_sql()
      • ConnectionContext.close()
      • ConnectionContext.add_primary_key()
      • ConnectionContext.copy()
      • ConnectionContext.create_schema()
      • ConnectionContext.create_table()
      • ConnectionContext.create_virtual_table()
      • ConnectionContext.drop_procedure()
      • ConnectionContext.drop_view()
      • ConnectionContext.drop_table()
      • ConnectionContext.copy_to_data_lake()
      • ConnectionContext.explain_plan_statement()
      • ConnectionContext.has_schema()
      • ConnectionContext.has_table()
      • ConnectionContext.hana_version()
      • ConnectionContext.get_current_schema()
      • ConnectionContext.get_tables()
      • ConnectionContext.get_procedures()
      • ConnectionContext.get_temporary_tables()
      • ConnectionContext.get_connection_id()
      • ConnectionContext.cancel_session_process()
      • ConnectionContext.restart_session()
      • ConnectionContext.clean_up_temporary_tables()
      • ConnectionContext.hana_major_version()
      • ConnectionContext.is_cloud_version()
      • ConnectionContext.sql()
      • ConnectionContext.execute_sql()
      • ConnectionContext.table()
      • ConnectionContext.upsert_streams_data()
      • ConnectionContext.update_streams_data()
      • ConnectionContext.to_sqlalchemy()
    • DataFrame
      • DataFrame.columns
      • DataFrame.shape
      • DataFrame.name
      • DataFrame.quoted_name
      • DataFrame.description
      • DataFrame.description_ext
      • DataFrame.declare_lttab_usage()
      • DataFrame.disable_validate_columns()
      • DataFrame.enable_validate_columns()
      • DataFrame.add_id()
      • DataFrame.add_constant()
      • DataFrame.alias()
      • DataFrame.count()
      • DataFrame.diff()
      • DataFrame.drop()
      • DataFrame.distinct()
      • DataFrame.drop_duplicates()
      • DataFrame.dropna()
      • DataFrame.deselect()
      • DataFrame.has_constant_columns()
      • DataFrame.drop_constant_columns()
      • DataFrame.dtypes()
      • DataFrame.empty()
      • DataFrame.filter()
      • DataFrame.has()
      • DataFrame.head()
      • DataFrame.hasna()
      • DataFrame.fillna()
      • DataFrame.get_table_structure()
      • DataFrame.join()
      • DataFrame.set_name()
      • DataFrame.set_index()
      • DataFrame.save()
      • DataFrame.save_nativedisktable()
      • DataFrame.split_column()
      • DataFrame.concat_columns()
      • DataFrame.nullif()
      • DataFrame.replace()
      • DataFrame.sort()
      • DataFrame.sort_values()
      • DataFrame.sort_index()
      • DataFrame.select()
      • DataFrame.set_operations()
      • DataFrame.union()
      • DataFrame.collect()
      • DataFrame.geometries
      • DataFrame.srids
      • DataFrame.rename_columns()
      • DataFrame.auto_cast()
      • DataFrame.cast()
      • DataFrame.tail()
      • DataFrame.to_head()
      • DataFrame.to_tail()
      • DataFrame.summary()
      • DataFrame.stats
      • DataFrame.describe()
      • DataFrame.bin()
      • DataFrame.agg()
      • DataFrame.is_numeric()
      • DataFrame.corr()
      • DataFrame.min()
      • DataFrame.max()
      • DataFrame.sum()
      • DataFrame.median()
      • DataFrame.mean()
      • DataFrame.stddev()
      • DataFrame.value_counts()
      • DataFrame.pivot_table()
      • DataFrame.generate_table_type()
      • DataFrame.rearrange()
      • DataFrame.set_source_table()
      • DataFrame.to_pickle()
      • DataFrame.to_datetime()
      • DataFrame.generate_feature()
      • DataFrame.mutate()
    • read_pickle()
    • create_dataframe_from_pandas()
    • create_dataframe_from_spark()
    • melt()
    • create_dataframe_from_shapefile()
    • import_csv_from()
  • hana_ml.algorithms.apl package
    • hana_ml.algorithms.apl.gradient_boosting_classification
      • GradientBoostingClassifier
        • GradientBoostingClassifier.set_params()
        • GradientBoostingClassifier.fit()
        • GradientBoostingClassifier.score()
        • GradientBoostingClassifier.get_metrics_per_class()
        • GradientBoostingClassifier.build_report()
        • GradientBoostingClassifier.set_metric_samplings()
        • GradientBoostingClassifier.disable_hana_execution()
        • GradientBoostingClassifier.enable_hana_execution()
        • GradientBoostingClassifier.export_apply_code()
        • GradientBoostingClassifier.generate_html_report()
        • GradientBoostingClassifier.generate_notebook_iframe_report()
        • GradientBoostingClassifier.get_apl_version()
        • GradientBoostingClassifier.get_artifacts_recorder()
        • GradientBoostingClassifier.get_best_iteration()
        • GradientBoostingClassifier.get_debrief_report()
        • GradientBoostingClassifier.get_evalmetrics()
        • GradientBoostingClassifier.get_feature_importances()
        • GradientBoostingClassifier.get_fit_operation_log()
        • GradientBoostingClassifier.get_indicators()
        • GradientBoostingClassifier.get_model_info()
        • GradientBoostingClassifier.get_params()
        • GradientBoostingClassifier.get_performance_metrics()
        • GradientBoostingClassifier.get_predict_operation_log()
        • GradientBoostingClassifier.get_summary()
        • GradientBoostingClassifier.is_fitted()
        • GradientBoostingClassifier.load_model()
        • GradientBoostingClassifier.predict()
        • GradientBoostingClassifier.save_artifact()
        • GradientBoostingClassifier.save_model()
        • GradientBoostingClassifier.schedule_fit()
        • GradientBoostingClassifier.schedule_predict()
        • GradientBoostingClassifier.set_framework_version()
        • GradientBoostingClassifier.set_scale_out()
        • GradientBoostingClassifier.set_shapley_explainer_of_predict_phase()
        • GradientBoostingClassifier.set_shapley_explainer_of_score_phase()
      • GradientBoostingBinaryClassifier
        • GradientBoostingBinaryClassifier.set_params()
        • GradientBoostingBinaryClassifier.score()
        • GradientBoostingBinaryClassifier.build_report()
        • GradientBoostingBinaryClassifier.disable_hana_execution()
        • GradientBoostingBinaryClassifier.enable_hana_execution()
        • GradientBoostingBinaryClassifier.export_apply_code()
        • GradientBoostingBinaryClassifier.fit()
        • GradientBoostingBinaryClassifier.generate_html_report()
        • GradientBoostingBinaryClassifier.generate_notebook_iframe_report()
        • GradientBoostingBinaryClassifier.get_apl_version()
        • GradientBoostingBinaryClassifier.get_artifacts_recorder()
        • GradientBoostingBinaryClassifier.get_best_iteration()
        • GradientBoostingBinaryClassifier.get_debrief_report()
        • GradientBoostingBinaryClassifier.get_evalmetrics()
        • GradientBoostingBinaryClassifier.get_feature_importances()
        • GradientBoostingBinaryClassifier.get_fit_operation_log()
        • GradientBoostingBinaryClassifier.get_indicators()
        • GradientBoostingBinaryClassifier.get_model_info()
        • GradientBoostingBinaryClassifier.get_params()
        • GradientBoostingBinaryClassifier.get_performance_metrics()
        • GradientBoostingBinaryClassifier.get_predict_operation_log()
        • GradientBoostingBinaryClassifier.get_summary()
        • GradientBoostingBinaryClassifier.is_fitted()
        • GradientBoostingBinaryClassifier.load_model()
        • GradientBoostingBinaryClassifier.predict()
        • GradientBoostingBinaryClassifier.save_artifact()
        • GradientBoostingBinaryClassifier.save_model()
        • GradientBoostingBinaryClassifier.schedule_fit()
        • GradientBoostingBinaryClassifier.schedule_predict()
        • GradientBoostingBinaryClassifier.set_framework_version()
        • GradientBoostingBinaryClassifier.set_metric_samplings()
        • GradientBoostingBinaryClassifier.set_scale_out()
        • GradientBoostingBinaryClassifier.set_shapley_explainer_of_predict_phase()
        • GradientBoostingBinaryClassifier.set_shapley_explainer_of_score_phase()
    • hana_ml.algorithms.apl.gradient_boosting_regression
      • GradientBoostingRegressor
        • GradientBoostingRegressor.set_params()
        • GradientBoostingRegressor.predict()
        • GradientBoostingRegressor.score()
        • GradientBoostingRegressor.build_report()
        • GradientBoostingRegressor.disable_hana_execution()
        • GradientBoostingRegressor.enable_hana_execution()
        • GradientBoostingRegressor.export_apply_code()
        • GradientBoostingRegressor.fit()
        • GradientBoostingRegressor.generate_html_report()
        • GradientBoostingRegressor.generate_notebook_iframe_report()
        • GradientBoostingRegressor.get_apl_version()
        • GradientBoostingRegressor.get_artifacts_recorder()
        • GradientBoostingRegressor.get_best_iteration()
        • GradientBoostingRegressor.get_debrief_report()
        • GradientBoostingRegressor.get_evalmetrics()
        • GradientBoostingRegressor.get_feature_importances()
        • GradientBoostingRegressor.get_fit_operation_log()
        • GradientBoostingRegressor.get_indicators()
        • GradientBoostingRegressor.get_model_info()
        • GradientBoostingRegressor.get_params()
        • GradientBoostingRegressor.get_performance_metrics()
        • GradientBoostingRegressor.get_predict_operation_log()
        • GradientBoostingRegressor.get_summary()
        • GradientBoostingRegressor.is_fitted()
        • GradientBoostingRegressor.load_model()
        • GradientBoostingRegressor.save_artifact()
        • GradientBoostingRegressor.save_model()
        • GradientBoostingRegressor.schedule_fit()
        • GradientBoostingRegressor.schedule_predict()
        • GradientBoostingRegressor.set_framework_version()
        • GradientBoostingRegressor.set_scale_out()
        • GradientBoostingRegressor.set_shapley_explainer_of_predict_phase()
        • GradientBoostingRegressor.set_shapley_explainer_of_score_phase()
    • hana_ml.algorithms.apl.time_series
      • AutoTimeSeries
        • AutoTimeSeries.set_params()
        • AutoTimeSeries.fit()
        • AutoTimeSeries.predict()
        • AutoTimeSeries.fit_predict()
        • AutoTimeSeries.forecast()
        • AutoTimeSeries.get_model_components()
        • AutoTimeSeries.get_performance_metrics()
        • AutoTimeSeries.get_horizon_wide_metric()
        • AutoTimeSeries.load_model()
        • AutoTimeSeries.export_apply_code()
        • AutoTimeSeries.build_report()
        • AutoTimeSeries.generate_html_report()
        • AutoTimeSeries.generate_notebook_iframe_report()
        • AutoTimeSeries.disable_hana_execution()
        • AutoTimeSeries.enable_hana_execution()
        • AutoTimeSeries.get_apl_version()
        • AutoTimeSeries.get_artifacts_recorder()
        • AutoTimeSeries.get_debrief_report()
        • AutoTimeSeries.get_fit_operation_log()
        • AutoTimeSeries.get_indicators()
        • AutoTimeSeries.get_model_info()
        • AutoTimeSeries.get_params()
        • AutoTimeSeries.get_predict_operation_log()
        • AutoTimeSeries.get_summary()
        • AutoTimeSeries.is_fitted()
        • AutoTimeSeries.save_artifact()
        • AutoTimeSeries.save_model()
        • AutoTimeSeries.schedule_fit()
        • AutoTimeSeries.schedule_predict()
        • AutoTimeSeries.set_scale_out()
    • hana_ml.algorithms.apl.classification
      • AutoClassifier
        • AutoClassifier.fit()
        • AutoClassifier.predict()
        • AutoClassifier.score()
        • AutoClassifier.disable_hana_execution()
        • AutoClassifier.enable_hana_execution()
        • AutoClassifier.export_apply_code()
        • AutoClassifier.get_apl_version()
        • AutoClassifier.get_artifacts_recorder()
        • AutoClassifier.get_debrief_report()
        • AutoClassifier.get_feature_importances()
        • AutoClassifier.get_fit_operation_log()
        • AutoClassifier.get_indicators()
        • AutoClassifier.get_model_info()
        • AutoClassifier.get_params()
        • AutoClassifier.get_performance_metrics()
        • AutoClassifier.get_predict_operation_log()
        • AutoClassifier.get_summary()
        • AutoClassifier.is_fitted()
        • AutoClassifier.load_model()
        • AutoClassifier.save_artifact()
        • AutoClassifier.save_model()
        • AutoClassifier.schedule_fit()
        • AutoClassifier.schedule_predict()
        • AutoClassifier.set_params()
        • AutoClassifier.set_scale_out()
    • hana_ml.algorithms.apl.regression
      • AutoRegressor
        • AutoRegressor.fit()
        • AutoRegressor.predict()
        • AutoRegressor.score()
        • AutoRegressor.disable_hana_execution()
        • AutoRegressor.enable_hana_execution()
        • AutoRegressor.export_apply_code()
        • AutoRegressor.get_apl_version()
        • AutoRegressor.get_artifacts_recorder()
        • AutoRegressor.get_debrief_report()
        • AutoRegressor.get_feature_importances()
        • AutoRegressor.get_fit_operation_log()
        • AutoRegressor.get_indicators()
        • AutoRegressor.get_model_info()
        • AutoRegressor.get_params()
        • AutoRegressor.get_performance_metrics()
        • AutoRegressor.get_predict_operation_log()
        • AutoRegressor.get_summary()
        • AutoRegressor.is_fitted()
        • AutoRegressor.load_model()
        • AutoRegressor.save_artifact()
        • AutoRegressor.save_model()
        • AutoRegressor.schedule_fit()
        • AutoRegressor.schedule_predict()
        • AutoRegressor.set_params()
        • AutoRegressor.set_scale_out()
    • hana_ml.algorithms.apl.clustering
      • AutoUnsupervisedClustering
        • AutoUnsupervisedClustering.fit()
        • AutoUnsupervisedClustering.fit_predict()
        • AutoUnsupervisedClustering.get_metrics()
        • AutoUnsupervisedClustering.disable_hana_execution()
        • AutoUnsupervisedClustering.enable_hana_execution()
        • AutoUnsupervisedClustering.export_apply_code()
        • AutoUnsupervisedClustering.get_apl_version()
        • AutoUnsupervisedClustering.get_artifacts_recorder()
        • AutoUnsupervisedClustering.get_debrief_report()
        • AutoUnsupervisedClustering.get_fit_operation_log()
        • AutoUnsupervisedClustering.get_indicators()
        • AutoUnsupervisedClustering.get_model_info()
        • AutoUnsupervisedClustering.get_params()
        • AutoUnsupervisedClustering.get_predict_operation_log()
        • AutoUnsupervisedClustering.get_summary()
        • AutoUnsupervisedClustering.is_fitted()
        • AutoUnsupervisedClustering.load_model()
        • AutoUnsupervisedClustering.predict()
        • AutoUnsupervisedClustering.save_artifact()
        • AutoUnsupervisedClustering.save_model()
        • AutoUnsupervisedClustering.schedule_fit()
        • AutoUnsupervisedClustering.schedule_predict()
        • AutoUnsupervisedClustering.set_params()
        • AutoUnsupervisedClustering.set_scale_out()
      • AutoSupervisedClustering
        • AutoSupervisedClustering.set_params()
        • AutoSupervisedClustering.fit()
        • AutoSupervisedClustering.predict()
        • AutoSupervisedClustering.fit_predict()
        • AutoSupervisedClustering.get_metrics()
        • AutoSupervisedClustering.load_model()
        • AutoSupervisedClustering.disable_hana_execution()
        • AutoSupervisedClustering.enable_hana_execution()
        • AutoSupervisedClustering.export_apply_code()
        • AutoSupervisedClustering.get_apl_version()
        • AutoSupervisedClustering.get_artifacts_recorder()
        • AutoSupervisedClustering.get_debrief_report()
        • AutoSupervisedClustering.get_fit_operation_log()
        • AutoSupervisedClustering.get_indicators()
        • AutoSupervisedClustering.get_model_info()
        • AutoSupervisedClustering.get_params()
        • AutoSupervisedClustering.get_predict_operation_log()
        • AutoSupervisedClustering.get_summary()
        • AutoSupervisedClustering.is_fitted()
        • AutoSupervisedClustering.save_artifact()
        • AutoSupervisedClustering.save_model()
        • AutoSupervisedClustering.schedule_fit()
        • AutoSupervisedClustering.schedule_predict()
        • AutoSupervisedClustering.set_scale_out()
  • hana_ml.algorithms.pal package
    • Algorithms
      • PAL Base
        • PALBase
      • Auto ML
        • AutomaticClassification
        • AutomaticRegression
        • AutomaticTimeSeries
        • Preprocessing
        • MassiveAutomaticClassification
        • MassiveAutomaticRegression
        • MassiveAutomaticTimeSeries
      • Unified Interface
        • UnifiedClassification
        • UnifiedRegression
        • UnifiedClustering
        • UnifiedExponentialSmoothing
      • Clustering
        • AffinityPropagation
        • AgglomerateHierarchicalClustering
        • DBSCAN
        • GeometryDBSCAN
        • KMeans
        • KMedians
        • KMedoids
        • SpectralClustering
        • KMeansOutlier
        • GaussianMixture
        • SOM
        • SlightSilhouette
        • outlier_detection_kmeans
      • Classification
        • LinearDiscriminantAnalysis
        • LogisticRegression
        • OnlineMultiLogisticRegression
        • NaiveBayes
        • KNNClassifier
        • MLPClassifier
        • SVC
        • OneClassSVM
        • DecisionTreeClassifier
        • RDTClassifier
        • HybridGradientBoostingClassifier
        • MLPMultiTaskClassifier
      • Regression
        • LinearRegression
        • OnlineLinearRegression
        • KNNRegressor
        • MLPRegressor
        • PolynomialRegression
        • GLM
        • ExponentialRegression
        • BiVariateGeometricRegression
        • BiVariateNaturalLogarithmicRegression
        • CoxProportionalHazardModel
        • SVR
        • DecisionTreeRegressor
        • RDTRegressor
        • HybridGradientBoostingRegressor
        • MLPMultiTaskRegressor
      • Preprocessing
        • FeatureNormalizer
        • FeatureSelection
        • IsolationForest
        • KBinsDiscretizer
        • Imputer
        • Discretize
        • MDS
        • SMOTE
        • SMOTETomek
        • TomekLinks
        • Sampling
        • ImputeTS
        • PowerTransform
        • QuantileTransform
        • OutlierDetectionRegression
        • PCA
        • CATPCA
        • train_test_val_split
        • variance_test
      • Time Series
        • AdditiveModelForecast
        • ARIMA
        • AutoARIMA
        • CPD
        • BCPD
        • OnlineBCPD
        • BSTS
        • TimeSeriesClassification
        • SingleExponentialSmoothing
        • DoubleExponentialSmoothing
        • TripleExponentialSmoothing
        • AutoExponentialSmoothing
        • BrownExponentialSmoothing
        • Croston
        • CrostonTSB
        • GARCH
        • Hierarchical_Forecast
        • LR_seasonal_adjust
        • LSTM
        • LTSF
        • OnlineARIMA
        • OutlierDetectionTS
        • GRUAttention
        • ROCKET
        • VectorARIMA
        • DWT
        • accuracy_measure
        • correlation
        • fft
        • dtw
        • fast_dtw
        • intermittent_forecast
        • hull_white_simulate
        • periodogram
        • permutation_importance
        • stationarity_test
        • seasonal_decompose
        • trend_test
        • wavedec
        • waverec
        • wpdec
        • wprec
        • white_noise_test
      • Statistics
        • bernoulli
        • beta
        • binomial
        • cauchy
        • chi_squared
        • exponential
        • gumbel
        • f
        • gamma
        • geometric
        • lognormal
        • negative_binomial
        • normal
        • pert
        • poisson
        • student_t
        • uniform
        • weibull
        • multinomial
        • mcmc
        • chi_squared_goodness_of_fit
        • chi_squared_independence
        • ttest_1samp
        • ttest_ind
        • ttest_paired
        • f_oneway
        • f_oneway_repeated
        • univariate_analysis
        • covariance_matrix
        • pearsonr_matrix
        • iqr
        • wilcoxon
        • median_test_1samp
        • grubbs_test
        • entropy
        • condition_index
        • cdf
        • ftest_equal_var
        • factor_analysis
        • kaplan_meier_survival_analysis
        • quantile
        • distribution_fit
        • ks_test
        • interval_quality
        • benford_analysis
        • KDE
      • Association
        • Apriori
        • AprioriLite
        • FPGrowth
        • KORD
        • SPM
      • Recommender System
        • ALS
        • FRM
        • FFMClassifier
        • FFMRegressor
        • FFMRanker
        • MLPRecommender
      • Social Network Analysis
        • LinkPrediction
        • PageRank
      • Ranking
        • SVRanking
      • Miscellaneous
        • abc_analysis
        • weighted_score_table
        • create_model_card
        • parse_model_card
        • TSNE
        • FairMLClassification
        • FairMLRegression
      • Metrics
        • accuracy_score
        • auc
        • confusion_matrix
        • multiclass_auc
        • r2_score
        • binary_classification_debriefing
      • Model and Pipeline
        • ParamSearchCV
        • GridSearchCV
        • RandomSearchCV
        • Pipeline
      • Text Processing
        • CRF
        • LatentDirichletAllocation
    • Topics
      • Model Evaluation and Parameter Selection
        • Resampling Methods
        • Search Strategies
      • Successive Halving and Hyperband for Parameter Selection
        • Key Relevant Parameters
      • Biased Linear Models
      • Model State for Real-Time Scoring
      • Local Interpretability of Models
        • SHAP
        • Surrogate
        • Direct Explanation
        • Models/Algorithms in hana_ml.algorithms.pal Packages that Support Local Interpretability
        • Key Relevant Parameters in hana-ml.algorithms.pal Package
      • Explaining the Forecasts of ARIMA
      • Methods for Residual Extraction in Time-Series Outlier Detection
        • 1. Residual from Median Filter
        • 2. Residual from Seasonal Decomposition
        • 3. Residual Extraction from Median Filter and Seasonal Decomposition
        • 4. Meaningless Parameter Combination to be Avoided
      • Methods of Outlier Detection from Residual
        • 1. Z1 Score
        • 2. Z2 Score
        • 3. IQR Score
        • 4. MAD Score
        • 5. Isolation Forest Score
        • 6. DBSCAN
      • Genetic Optimization in AutoML
        • Individual Representation
        • Selection
        • Crossover
        • Mutation
        • Evolutional Iteration Step
        • Control Parameters
      • Probability Density Functions for MCMC Sampling
      • Miscellaneous Topics
        • Early Stop in HGBT
        • Feature Grouping in HGBT
        • Histogram Splitting in HGBT
        • Model Compression for Random Decision Trees
        • Model Compression for Support Vector Machine
        • Seasonalities in Additive Model Forecast
      • Precomputed Distance Matrix as input data in UnifiedClustering
      • Parameters for Missing Value Handling in HANA DataFrame
      • Permutation Feature Importance
      • Permutation Feature Importance for Time Series
        • Parameters
        • Returns
        • Examples
    • Parameter Mappings
  • hana_ml.visualizers package
    • hana_ml.visualizers.eda
      • quarter_plot()
      • seasonal_plot()
      • timeseries_box_plot()
      • bubble_plot()
      • parallel_coordinates()
      • plot_acf()
      • plot_pacf()
      • plot_time_series_outlier()
      • plot_change_points()
      • plot_moving_average()
      • plot_rolling_stddev()
      • plot_seasonal_decompose()
      • kdeplot()
      • hist()
      • plot_psd()
      • EDAVisualizer
        • EDAVisualizer.distribution_plot()
        • EDAVisualizer.pie_plot()
        • EDAVisualizer.correlation_plot()
        • EDAVisualizer.scatter_plot()
        • EDAVisualizer.bar_plot()
        • EDAVisualizer.box_plot()
        • EDAVisualizer.ax
        • EDAVisualizer.cmap
        • EDAVisualizer.reset()
        • EDAVisualizer.set_ax()
        • EDAVisualizer.set_cmap()
        • EDAVisualizer.set_size()
        • EDAVisualizer.size
      • Profiler
        • Profiler.description()
        • Profiler.set_size()
    • hana_ml.visualizers.metrics
      • MetricsVisualizer
        • MetricsVisualizer.plot_confusion_matrix()
        • MetricsVisualizer.ax
        • MetricsVisualizer.cmap
        • MetricsVisualizer.reset()
        • MetricsVisualizer.set_ax()
        • MetricsVisualizer.set_cmap()
        • MetricsVisualizer.set_size()
        • MetricsVisualizer.size
    • hana_ml.visualizers.m4_sampling
      • get_min_index()
      • get_max_index()
      • m4_sampling()
    • hana_ml.visualizers.model_debriefing
      • TreeModelDebriefing
        • TreeModelDebriefing.tree_debrief()
        • TreeModelDebriefing.tree_export()
        • TreeModelDebriefing.tree_parse()
        • TreeModelDebriefing.tree_debrief_with_dot()
        • TreeModelDebriefing.tree_export_with_dot()
        • TreeModelDebriefing.shapley_explainer()
    • hana_ml.visualizers.dataset_report
      • DatasetReportBuilder
        • DatasetReportBuilder.build()
        • DatasetReportBuilder.set_framework_version()
        • DatasetReportBuilder.get_report_html()
        • DatasetReportBuilder.get_iframe_report_html()
        • DatasetReportBuilder.generate_html_report()
        • DatasetReportBuilder.generate_notebook_iframe_report()
    • hana_ml.visualizers.shap
      • ShapleyExplainer
        • ShapleyExplainer.get_feature_value_and_effect()
        • ShapleyExplainer.get_force_plot_item()
        • ShapleyExplainer.get_beeswarm_plot_item()
        • ShapleyExplainer.get_bar_plot_item()
        • ShapleyExplainer.get_dependence_plot_items()
        • ShapleyExplainer.get_enhanced_dependence_plot_items()
        • ShapleyExplainer.force_plot()
        • ShapleyExplainer.summary_plot()
      • TimeSeriesExplainer
        • TimeSeriesExplainer.explain_arima_model()
        • TimeSeriesExplainer.explain_additive_model()
    • hana_ml.visualizers.unified_report
      • UnifiedReport
        • UnifiedReport.set_model_report_style()
        • UnifiedReport.build()
        • UnifiedReport.set_metric_samplings()
        • UnifiedReport.tree_debrief()
        • UnifiedReport.display()
        • UnifiedReport.get_iframe_report()
    • hana_ml.visualizers.visualizer_base
      • forecast_line_plot()
    • hana_ml.visualizers.digraph
      • Node
      • InPort
      • OutPort
      • Edge
      • DigraphConfig
        • DigraphConfig.set_text_layout()
        • DigraphConfig.set_digraph_layout()
        • DigraphConfig.set_node_sep()
        • DigraphConfig.set_rank_sep()
      • BaseDigraph
        • BaseDigraph.add_model_node()
        • BaseDigraph.add_python_node()
        • BaseDigraph.add_edge()
      • Digraph
        • Digraph.to_json()
        • Digraph.build()
        • Digraph.generate_html()
        • Digraph.generate_notebook_iframe()
        • Digraph.add_edge()
        • Digraph.add_model_node()
        • Digraph.add_python_node()
      • MultiDigraph
        • MultiDigraph.ChildDigraph
        • MultiDigraph.add_child_digraph()
        • MultiDigraph.to_json()
        • MultiDigraph.build()
        • MultiDigraph.generate_html()
        • MultiDigraph.generate_notebook_iframe()
    • hana_ml.visualizers.word_cloud
      • WordCloud
        • WordCloud.build()
        • WordCloud.fit_words()
        • WordCloud.generate()
        • WordCloud.generate_from_frequencies()
        • WordCloud.generate_from_text()
        • WordCloud.process_text()
        • WordCloud.recolor()
        • WordCloud.to_array()
        • WordCloud.to_file()
        • WordCloud.to_svg()
    • hana_ml.visualizers.automl_progress
      • PipelineProgressStatusMonitor
        • PipelineProgressStatusMonitor.start()
      • SimplePipelineProgressStatusMonitor
        • SimplePipelineProgressStatusMonitor.start()
    • hana_ml.visualizers.automl_report
      • BestPipelineReport
        • BestPipelineReport.generate_notebook_iframe()
        • BestPipelineReport.generate_html()
    • hana_ml.visualizers.time_series_report
      • TimeSeriesReport
        • TimeSeriesReport.addPage()
        • TimeSeriesReport.addPages()
        • TimeSeriesReport.build()
        • TimeSeriesReport.generate_html()
        • TimeSeriesReport.generate_notebook_iframe()
        • TimeSeriesReport.to_json()
      • DatasetAnalysis
        • DatasetAnalysis.pacf_item()
        • DatasetAnalysis.moving_average_item()
        • DatasetAnalysis.rolling_stddev_item()
        • DatasetAnalysis.seasonal_item()
        • DatasetAnalysis.timeseries_box_item()
        • DatasetAnalysis.seasonal_decompose_items()
        • DatasetAnalysis.quarter_item()
        • DatasetAnalysis.outlier_item()
        • DatasetAnalysis.stationarity_item()
        • DatasetAnalysis.real_item()
        • DatasetAnalysis.change_points_item()
    • hana_ml.visualizers.automl_config
      • AutoMLConfig
        • AutoMLConfig.get_config_dict()
        • AutoMLConfig.generate_html()
  • hana_ml.ml_exceptions
    • Error
    • FitIncompleteError
    • BadSQLError
    • PALUnusableError
    • ModelExistingError
  • hana_ml.model_storage
    • ModelStorageError
    • ModelStorage
      • ModelStorage.export_model()
      • ModelStorage.load_model_from_files()
      • ModelStorage.import_model()
      • ModelStorage.list_models()
      • ModelStorage.model_already_exists()
      • ModelStorage.change_storage_type()
      • ModelStorage.save_model()
      • ModelStorage.save_model_to_files()
      • ModelStorage.delete_model()
      • ModelStorage.delete_models()
      • ModelStorage.load_mlflow_model()
      • ModelStorage.clean_up()
      • ModelStorage.load_model()
      • ModelStorage.get_model_card()
      • ModelStorage.display_model_report()
      • ModelStorage.enable_persistent_memory()
      • ModelStorage.disable_persistent_memory()
      • ModelStorage.load_into_memory()
      • ModelStorage.unload_from_memory()
      • ModelStorage.set_data_lake_container()
      • ModelStorage.set_schedule()
      • ModelStorage.display_hana_schedule()
      • ModelStorage.start_schedule()
      • ModelStorage.terminate_schedule()
      • ModelStorage.set_logfile()
      • ModelStorage.upgrade_meta()
  • hana_ml.artifacts package
    • AMDP Examples
    • hana_ml.artifacts.deployers.amdp
      • gen_pass_key()
      • AMDPDeployer
        • AMDPDeployer.deploy()
        • AMDPDeployer.deploy_class()
        • AMDPDeployer.register_islm()
        • AMDPDeployer.get_is_information_from_islm()
        • AMDPDeployer.format()
    • hana_ml.artifacts.generators.abap
      • AMDPGenerator
        • AMDPGenerator.generate()
    • hana_ml.artifacts.generators.hana
      • HANAGeneratorForCAP
        • HANAGeneratorForCAP.materialize_ds_data()
        • HANAGeneratorForCAP.generate_artifacts()
      • HanaGenerator
        • HanaGenerator.generate_artifacts()
  • hana_ml.docstore package
    • create_collection_from_elements()
  • hana_ml.spatial package
    • create_predefined_srs()
    • is_srs_created()
    • get_created_srses()
  • hana_ml.graph package
    • Graph
      • Graph.describe()
      • Graph.degree_distribution()
      • Graph.drop()
      • Graph.has_vertices()
      • Graph.vertices()
      • Graph.edges()
      • Graph.in_edges()
      • Graph.out_edges()
      • Graph.source()
      • Graph.target()
      • Graph.subgraph()
    • create_graph_from_dataframes()
    • create_graph_from_edges_dataframe()
    • create_graph_from_hana_dataframes()
    • discover_graph_workspace()
    • discover_graph_workspaces()
  • hana_ml.graph.algorithms package
    • ShortestPath
      • ShortestPath.execute()
      • ShortestPath.vertices
      • ShortestPath.edges
      • ShortestPath.weight
    • Neighbors
      • Neighbors.execute()
      • Neighbors.vertices
    • NeighborsSubgraph
      • NeighborsSubgraph.execute()
      • NeighborsSubgraph.vertices
      • NeighborsSubgraph.edges
    • KShortestPaths
      • KShortestPaths.execute()
      • KShortestPaths.paths
    • TopologicalSort
      • TopologicalSort.execute()
      • TopologicalSort.vertices
      • TopologicalSort.is_sortable
    • ShortestPathsOneToAll
      • ShortestPathsOneToAll.execute()
      • ShortestPathsOneToAll.vertices
      • ShortestPathsOneToAll.edges
    • StronglyConnectedComponents
      • StronglyConnectedComponents.execute()
      • StronglyConnectedComponents.vertices
      • StronglyConnectedComponents.components
      • StronglyConnectedComponents.components_count
    • WeaklyConnectedComponents
      • WeaklyConnectedComponents.execute()
      • WeaklyConnectedComponents.vertices
      • WeaklyConnectedComponents.components
      • WeaklyConnectedComponents.components_count
    • AlgorithmBase
      • AlgorithmBase.signature_from_cols()
      • AlgorithmBase.projection_expr_from_cols()
      • AlgorithmBase._default_vertex_cols()
      • AlgorithmBase._default_edge_cols()
      • AlgorithmBase._default_vertex_select()
      • AlgorithmBase._default_edge_select()
      • AlgorithmBase._process_parameters()
      • AlgorithmBase._validate_parameters()
      • AlgorithmBase.execute()
  • hana_ml.text.tm package
    • hana_ml.text.tm
      • tf_analysis
        • tf_analysis()
      • text_classification
        • text_classification()
      • get_related_doc
        • get_related_doc()
      • get_related_term
        • get_related_term()
      • get_relevant_doc
        • get_relevant_doc()
      • get_relevant_term
        • get_relevant_term()
      • get_suggested_term
        • get_suggested_term()
      • TFIDF
        • TFIDF
        • Inherited Methods from PALBase
      • TextClassificationWithModel
        • TextClassificationWithModel
        • Inherited Methods from PALBase
  • hana_ml.hana_scheduler
    • HANAScheduler
      • HANAScheduler.check_scheduler_job_exist()
      • HANAScheduler.list_schedules()
      • HANAScheduler.get_job_names()
      • HANAScheduler.display_schedule_status()
      • HANAScheduler.set_schedule()
      • HANAScheduler.delete_schedule()
      • HANAScheduler.delete_schedules()
      • HANAScheduler.clean_up_schedules()
      • HANAScheduler.create_training_schedule()
      • HANAScheduler.create_applying_schedule()
      • HANAScheduler.create_scoring_schedule()
  • FAQs
hana-ml
  • hana_ml.text.tm package
  • get_related_doc
  • View page source
Previous Next

get_related_doc

hana_ml.text.tm.get_related_doc(pred_data, ref_data=None, top=None, threshold=None, lang='EN', index_name=None, thread_ratio=None, created_index=None, key=None)

This function returns the top-ranked related documents for a query document / or multiple docments based on Term Frequency - Inverse Document Frequency (TF-IDF) result or reference data.

Parameters:
pred_dataDataFrame

Accepts input data in two different data structures:

Single-row mode:

  • 1st column, Document content.

Note

Important to note that this mode can only process one content at a time. Therefore, the input table must have a structure of one row and one column only.

Massive mode supports multiple rows:

  • 1st column, ID.

  • 2nd column, Document content.

Note

Important to note that this mode can only valid in SAP HANA Cloud instance.

keystr, optional

Specifies the ID column. Only valid when pred_data contains multiple rows.

Defaults to the first column of pred_data.

ref_dataDataFrame or a tuple of DataFrame,

Specify the reference data.

If ref_data is a DataFrame, then it should be structured as follows:

  • 1st column, ID.

  • 2nd column, Document content.

  • 3rd column, Document category.

If ref_data is a tuple of DataFrame, then it should be corresponding to the reference TF-IDF data, with each DataFrame structured as follows:

  • 1st DataFrame, TF-IDF Result.

    • 1st column, TM_TERM.

    • 2nd column, TF_VALUE.

    • 3rd column, IDF_VALUE.

  • 2nd DataFrame, Doc Term Freq Table

    • 1st column, ID.

    • 2nd column, TM_TERM.

    • 3rd column, TM_TERM_FREQUENCY.

  • 3rd DataFrame, Doc Category Table

    • 1st column, ID.

    • 2nd column, Document category.

topint, optional

Only show top N results. If 0, it shows all.

Defaults to 0.

thresholdfloat, optional

Only the results which score bigger than this value will be put into the result table.

Defaults to 0.0.

langstr, optional

Specify the language type. HANA cloud instance currently supports 'EN', 'DE', 'ES', 'FR', 'RU', 'PT'. If None, auto detection will be applied.

Defaults to 'EN'.

index_namestr, optional

Specify the index name that apply only to an on-premise HANA instance.

If None, it will be generated.

thread_ratiofloat, optional

Specify the ratio of total number of threads that can be used by this function. The range of this parameter is from 0 to 1, where 0 means only using 1 thread, and 1 means using at most all the currently available threads. Values outside this range are ignored and this function heuristically determines the number of threads to use.

Only valid for a HANA cloud instance.

Defaults to 0.0.

created_index{"index": xxx, "schema": xxx, "table": xxx}, optional

Use the created index on the given table that apply only to an on-premise HANA instance.

Returns:
DataFrame

Examples

Assuming 'ref_df' is an existing DataFrame that contains document IDs, content, and categories. Below are examples of invoking the 'get_related_doc' function.

>>> ref_df.collect()
      ID                                                  CONTENT       CATEGORY
0   doc1                      term1 term2 term2 term3 term3 term3     CATEGORY_1
1   doc2                      term2 term3 term3 term4 term4 term4     CATEGORY_1
2   doc3                      term3 term4 term4 term5 term5 term5     CATEGORY_2
3   doc4    term3 term4 term4 term5 term5 term5 term5 term5 term5     CATEGORY_2
4   doc5                                              term4 term6     CATEGORY_3
5   doc6                                  term4 term6 term6 term6     CATEGORY_3

For SAP HANA cloud:

  1. Invoking the function on a SAP HANA cloud instance using a single-row input DataFrame 'pred_df':

>>> pred_df.collect()
                   CONTENT
0  term2 term2 term3 term3
>>> get_related_doc(pred_data=pred_df, ref_data=tfidf).collect()
       ID       SCORE
0    doc2    0.891550
1    doc1    0.804670
2    doc3    0.042024
3    doc4    0.021225

tfidf is a DataFrame returned by tf_analysis function, please refer to the examples section of tf_analysis for its content.

  1. Invoking the function on a SAP HANA cloud instance using a massive input DataFrame 'pred_df_massive' which contains multiple rows of data:

>>> pred_df_massive.collect()
   ID                          CONTENT
0   1          term2 term2 term3 term3
1   5    term3 term5 term5 term5 term6
>>> get_related_doc(pred_data=pred_df_massive, ref_data=ref_df).collect()
   PREDICT_ID  K DOC_ID     SCORE
0           1  0   doc2  0.891550
1           1  1   doc1  0.804670
2           1  2   doc3  0.042024
3           1  3   doc4  0.021225
4           5  0   doc4  0.946186
5           5  1   doc3  0.943719
6           5  2   doc6  0.313616
7           5  3   doc5  0.309858
8           5  4   doc2  0.063908
9           5  5   doc1  0.045706

For SAP HANA on-premise:

Invoking the function on a SAP HANA on-premise instance (only supports single-row mode):

>>> res = get_related_doc(pred_data=pred_df, ref_data=ref_df)
>>> res.collect()
   ID    RANK   TOTAL_TERM_COUNT  TERM_COUNT  CORRELATIONS  FACTORS  ROTATED_FACTORS  CLUSTER_LEVEL  CLUSTER_LEFT
0  doc2     1                  6           3          None     None             None           None          None
1  doc1     2                  6           3          None     None             None           None          None
2  doc3     3                  6           3          None     None             None           None          None
3  doc4     4                  9           3          None     None             None           None          None
... CLUSTER_RIGHT  HIGHLIGHTED_DOCUMENT  HIGHLIGHTED_TERMTYPES                                   SCORE
...          None                  None                   None    0.8915504731053067732915451415465213
...          None                  None                   None    0.8046698732333942283290184604993556
...          None                  None                   None   0.04202449735779462125506711345224176
...          None                  None                   None   0.02122540837399113089478674964993843

© Copyright 2024, SAP.

Built with Sphinx using a theme provided by Read the Docs.
  • Copyright
  • Disclaimer
  • Privacy Statement
  • Legal Disclosure
  • Trademark
  • Terms of Use