Commit 93e42ff4 authored by Federico Mestrone's avatar Federico Mestrone
Browse files

Added file to run pipeline on Dataflow with maven

parent 62482acb
#!/usr/bin/env bash
mvn compile exec:java -e \
-Dexec.mainClass=gcp.cm.bigdata.adtech.dataflow.CleaningPipeline \
-Dexec.args="--project=qwiklabs-gcp-56c3e61809c73e4d \
--stagingLocation=gs://abucket-for-codemotion/dataflow/staging/ \
--tempLocation=gs://abucket-for-codemotion/dataflow/temp/ \
--runner=DataflowRunner"
......@@ -16,8 +16,8 @@ public class CleaningPipeline {
Pipeline p = Pipeline.create(options);
p
.apply("ReadFromGCS",
// TextIO.read().from("gs://abucket-for-codemotion/adtech/test"))
TextIO.read().from("/Users/federico/Downloads/avazu-ctr-prediction/test"))
TextIO.read().from("gs://abucket-for-codemotion/adtech/test"))
// TextIO.read().from("/Users/federico/Downloads/avazu-ctr-prediction/test"))
.apply("SplitLines", MapElements
.into(TypeDescriptors.lists(TypeDescriptors.strings()))
.via((String line) -> Arrays.asList(line.split(",")))
......@@ -35,8 +35,8 @@ public class CleaningPipeline {
.via((List<String> fields) -> String.join(",", fields))
)
.apply("WriteToGCS",
// TextIO.write().withoutSharding().to("gs://abucket-for-codemotion/adtech/test-df-out.csv")
TextIO.write().withoutSharding().to("/Users/federico/Downloads/avazu-ctr-prediction/test-df-out.csv")
TextIO.write().withoutSharding().to("gs://abucket-for-codemotion/adtech/test-df-out.csv")
// TextIO.write().withoutSharding().to("/Users/federico/Downloads/avazu-ctr-prediction/test-df-out.csv")
)
;
p.run().waitUntilFinish();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment