byzer python error:RuntimeError: (‘Exception thrown when converting pandas.Series (object) to Arrow
【代码】byzer python error:RuntimeError: (‘Exception thrown when converting pandas.Series (object) to Arrow。
·
错误信息:
RuntimeError: ('Exception thrown when converting pandas.Series (object) to Arrow Array (None). It can be caused by overflows or other unsafe conversions warned by Arrow. Arrow safe type check can be disabled by using SQL config `spark.sql.execution.pandas.arrowSafeTypeConversion`.', ArrowInvalid('Could not convert DenseVector([1.6486, -4.0133, -1.0091]) with type DenseVector: did not recognize Python value type when inferring an Arrow data type'))
#%python
#%input=command
#%output=python_output_table
#%schema=st(field(_id,string),field(x,array(double)))
#%runIn=driver
#%dataMode=model
#%cache=false
#%env=source /home/byzerllm/miniconda3/bin/activate byzerllm-dev
#%pythonExec=/home/byzerllm/miniconda3/envs/byzerllm-dev/bin/python
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
sc = SparkContext('local')
spark = SparkSession(sc)
from pyspark.ml.feature import PCA
from pyspark.ml.linalg import Vectors
data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
(Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
(Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
df = spark.createDataFrame(data, ["features"])
pca = PCA(k=3, inputCol="features", outputCol="pcaFeatures")
model = pca.fit(df)
rows = model.transform(df).select("pcaFeatures").collect()
for row in rows:
print(row)
id_count = 1
def handle_record(row):
global id_count
item = {"_id": str(id_count)}
id_count += 1
item["x"] = row[0]
print("row0: %s" % row[0])
return item
result = [handle_record(row) for row in rows]
context.build_result(result)
更多推荐
已为社区贡献5条内容
所有评论(0)