Data: +-------------------+--------------------+ | label| features| +-------------------+--------------------+ |0.13271745268556925|[-0.2006809895664...| |0.23956421080605234|[-0.0938342314459...| |0.47464690691431843|[0.14124846466227...| | 0.0941426858669834|[-0.2392557563850...| |0.18127172833957172|[-0.1521267139124...| | 0.4279981695794981|[0.09459972732745...| |0.04648603521554342|[-0.2869124070364...| | 0.4164836719056925|[0.08308522965365...| |0.15519130823516833|[-0.1782071340168...| |0.34583751349139175|[0.01243907123934...| | 0.5732358988284585|[0.2398374565764162]| |0.12352025893247957|[-0.2098781833195...| | 0.672220700788423|[0.3388222585363807]| |0.11796247818430779|[-0.2154359640677...| |0.32647852580932724|[-0.0069199164427...| |0.09211654339348248|[-0.2412818988585...| | 0.4907542977669017|[0.15735585551485...| | 0.3255888257160203|[-0.0078096165360...| | 0.8542890157811815|[0.5208905735291393]| | 0.1132558594215048|[-0.2201425828305...| +-------------------+--------------------+ only showing top 20 rows
val model = lr.fit(data) val predict_data = model.transform(data) +--------------------+-------------------+-------------------+ | features| label| predicted_label| +--------------------+-------------------+-------------------+ |[-0.2006809895664...|0.13271745268556925|0.13271745268556925| |[-0.0938342314459...|0.23956421080605234|0.23956421080605234| |[0.14124846466227...|0.47464690691431843|0.47464690691431843| |[-0.2392557563850...| 0.0941426858669834| 0.0941426858669834| |[-0.1521267139124...|0.18127172833957172|0.18127172833957172| |[0.09459972732745...| 0.4279981695794981| 0.4279981695794981| |[-0.2869124070364...|0.04648603521554342| 0.0464860352155434| |[0.08308522965365...| 0.4164836719056925| 0.4164836719056925| |[-0.1782071340168...|0.15519130823516833|0.15519130823516833| |[0.01243907123934...|0.34583751349139175|0.34583751349139175| |[0.2398374565764162]| 0.5732358988284585| 0.5732358988284585| |[-0.2098781833195...|0.12352025893247957|0.12352025893247959| |[0.3388222585363807]| 0.672220700788423| 0.672220700788423| |[-0.2154359640677...|0.11796247818430779|0.11796247818430777| |[-0.0069199164427...|0.32647852580932724|0.32647852580932724| |[-0.2412818988585...|0.09211654339348248|0.09211654339348246| |[0.15735585551485...| 0.4907542977669017| 0.4907542977669017| |[-0.0078096165360...| 0.3255888257160203| 0.3255888257160203| |[0.5208905735291393]| 0.8542890157811815| 0.8542890157811815| |[-0.2201425828305...| 0.1132558594215048|0.11325585942150479| +--------------------+-------------------+-------------------+ only showing top 20 rows model.weights res49: org.apache.spark.mllib.linalg.Vector = [1.0] if instead, I remove the intercept: val zz = lrr.setFitIntercept(false).fit(vnt_data) zz.transform(vnt_data).select(scnd_feat_col, scnd_lab_col, scnd_pred_col).show +--------------------+-------------------+--------------------+ | features| label| predicted_label| +--------------------+-------------------+--------------------+ |[-0.2006809895664...|0.13271745268556925|-0.20472873432747501| |[-0.0938342314459...|0.23956421080605234|-0.09572687219665929| |[0.14124846466227...|0.47464690691431843| 0.1440974526709132| |[-0.2392557563850...| 0.0941426858669834|-0.24408155596148765| |[-0.1521267139124...|0.18127172833957172|-0.15519511670726388| |[0.09459972732745...| 0.4279981695794981| 0.09650780816515314| |[-0.2869124070364...|0.04648603521554342|-0.29269944344167753| |[0.08308522965365...| 0.4164836719056925| 0.0847610625453144| |[-0.1782071340168...|0.15519130823516833| -0.1818015800809893| |[0.01243907123934...|0.34583751349139175|0.012689967876592361| |[0.2398374565764162]| 0.5732358988284585| 0.24467498907237623| |[-0.2098781833195...|0.12352025893247957|-0.21411143590026604| |[0.3388222585363807]| 0.672220700788423| 0.3456563190264363| |[-0.2154359640677...|0.11796247818430779| -0.2197813173409589| |[-0.0069199164427...|0.32647852580932724|-0.00705949147465...| |[-0.2412818988585...|0.09211654339348248|-0.24614856582157998| |[0.15735585551485...| 0.4907542977669017| 0.16052973033553486| |[-0.0078096165360...| 0.3255888257160203|-0.00796713685963...| |[0.5208905735291393]| 0.8542890157811815| 0.5313969602806332| |[-0.2201425828305...| 0.1132558594215048|-0.22458286882001133| +--------------------+-------------------+--------------------+ only showing top 20 rows makes much more sense Thanks for the help, saif