Skip to content

Commit 7a364f1

Browse files
committed
Adding data pipeline sample code
1 parent 39efd8f commit 7a364f1

9 files changed

+119586
-0
lines changed

Diff for: azure-data-pipeline/data/sample.csv

+119,041
Large diffs are not rendered by default.

Diff for: azure-data-pipeline/data_pipeline_ci_cd.yml

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
name: CICD
2+
pr:
3+
branches:
4+
include:
5+
- master
6+
- adf_publish
7+
trigger:
8+
branches:
9+
include:
10+
- master
11+
paths:
12+
include:
13+
- scripts/
14+
15+
variables:
16+
- group: datapipeline-vg
17+
- group: keys-vg
18+
pool:
19+
vmImage: ubuntu-latest
20+
21+
22+
stages:
23+
- stage: 'CI'
24+
displayName: 'CI'
25+
jobs:
26+
- job: "CI_Job"
27+
displayName: "CI Job"
28+
# The CI stage produces two artifacts (notebooks and ADF pipelines).
29+
# The pipelines Azure Resource Manager templates are stored in a technical branch "adf_publish"
30+
steps:
31+
- checkout: self
32+
- script: dir $(Build.SourcesDirectory)/$(Build.Repository.Name)
33+
- publish: $(Build.SourcesDirectory)/$(Build.Repository.Name)/notebooks
34+
artifact: notebooks
35+
- checkout: git://${{variables['System.TeamProject']}}@adf_publish
36+
- script: dir $(Build.SourcesDirectory)/$(Build.Repository.Name)
37+
- publish: $(Build.SourcesDirectory)/$(Build.Repository.Name)/$(DATA_FACTORY_DEV_NAME)
38+
artifact: adf-pipelines
39+
- stage: 'CD'
40+
displayName: 'CD'
41+
jobs:
42+
- deployment: "Deploy_to_Databricks"
43+
displayName: 'Deploy to Databricks'
44+
timeoutInMinutes: 0
45+
environment: qa
46+
strategy:
47+
runOnce:
48+
deploy:
49+
steps:
50+
- task: UsePythonVersion@0
51+
inputs:
52+
versionSpec: '3.x'
53+
addToPath: true
54+
architecture: 'x64'
55+
displayName: 'Use Python3'
56+
# Need to install DevOps for Azure Databricks extension
57+
- task: configuredatabricks@0
58+
inputs:
59+
url: '$(DATABRICKS_URL)'
60+
token: '$(databricks-token)'
61+
displayName: 'Configure Databricks CLI'
62+
63+
- task: deploynotebooks@0
64+
inputs:
65+
notebooksFolderPath: '$(Pipeline.Workspace)/notebooks'
66+
workspaceFolder: '/Shared'
67+
displayName: 'Deploy (copy) data processing notebook to the Databricks cluster'
68+
- deployment: "Deploy_to_ADF"
69+
displayName: 'Deploy to ADF'
70+
timeoutInMinutes: 0
71+
environment: qa
72+
strategy:
73+
runOnce:
74+
deploy:
75+
steps:
76+
- task: AzureResourceGroupDeployment@2
77+
displayName: 'Deploy ADF resources'
78+
inputs:
79+
azureSubscription: $(AZURE_RM_CONNECTION)
80+
resourceGroupName: $(RESOURCE_GROUP)
81+
location: $(LOCATION)
82+
csmFile: '$(Pipeline.Workspace)/adf-pipelines/ARMTemplateForFactory.json'
83+
csmParametersFile: '$(Pipeline.Workspace)/adf-pipelines/ARMTemplateParametersForFactory.json'
84+
overrideParameters: -factoryName "$(DATA_FACTORY_TEST_NAME)"
85+
-DataPipeline_properties_variables_storage_account_name_defaultValue "$(STORAGE_ACCOUNT_NAME)"
86+
-DataPipeline_properties_variables_storage_container_name_defaultValue "$(STORAGE_CONTAINER_NAME)"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"Microsoft.DataFactory/factories/pipelines": {
3+
"properties": {
4+
"variables": {
5+
"*": {
6+
"defaultValue": "="
7+
}
8+
}
9+
}
10+
}
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"name": "PreparedDataset",
3+
"properties": {
4+
"linkedServiceName": {
5+
"referenceName": "AzureBlobStorage1",
6+
"type": "LinkedServiceReference"
7+
},
8+
"annotations": [],
9+
"type": "DelimitedText",
10+
"typeProperties": {
11+
"location": {
12+
"type": "AzureBlobStorageLocation",
13+
"container": "prepareddata"
14+
},
15+
"columnDelimiter": ",",
16+
"escapeChar": "\\",
17+
"quoteChar": "\""
18+
},
19+
"schema": []
20+
},
21+
"type": "Microsoft.DataFactory/factories/datasets"
22+
}
+218
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
{
2+
"name": "RawDataset",
3+
"properties": {
4+
"linkedServiceName": {
5+
"referenceName": "AzureBlobStorage1",
6+
"type": "LinkedServiceReference"
7+
},
8+
"annotations": [],
9+
"type": "DelimitedText",
10+
"typeProperties": {
11+
"location": {
12+
"type": "AzureBlobStorageLocation",
13+
"container": "rawdata"
14+
},
15+
"columnDelimiter": ",",
16+
"escapeChar": "\\",
17+
"quoteChar": "\""
18+
},
19+
"schema": [
20+
{
21+
"type": "String"
22+
},
23+
{
24+
"type": "String"
25+
},
26+
{
27+
"type": "String"
28+
},
29+
{
30+
"type": "String"
31+
},
32+
{
33+
"type": "String"
34+
},
35+
{
36+
"type": "String"
37+
},
38+
{
39+
"type": "String"
40+
},
41+
{
42+
"type": "String"
43+
},
44+
{
45+
"type": "String"
46+
},
47+
{
48+
"type": "String"
49+
},
50+
{
51+
"type": "String"
52+
},
53+
{
54+
"type": "String"
55+
},
56+
{
57+
"type": "String"
58+
},
59+
{
60+
"type": "String"
61+
},
62+
{
63+
"type": "String"
64+
},
65+
{
66+
"type": "String"
67+
},
68+
{
69+
"type": "String"
70+
},
71+
{
72+
"type": "String"
73+
},
74+
{
75+
"type": "String"
76+
},
77+
{
78+
"type": "String"
79+
},
80+
{
81+
"type": "String"
82+
},
83+
{
84+
"type": "String"
85+
},
86+
{
87+
"type": "String"
88+
},
89+
{
90+
"type": "String"
91+
},
92+
{
93+
"type": "String"
94+
},
95+
{
96+
"type": "String"
97+
},
98+
{
99+
"type": "String"
100+
},
101+
{
102+
"type": "String"
103+
},
104+
{
105+
"type": "String"
106+
},
107+
{
108+
"type": "String"
109+
},
110+
{
111+
"type": "String"
112+
},
113+
{
114+
"type": "String"
115+
},
116+
{
117+
"type": "String"
118+
},
119+
{
120+
"type": "String"
121+
},
122+
{
123+
"type": "String"
124+
},
125+
{
126+
"type": "String"
127+
},
128+
{
129+
"type": "String"
130+
},
131+
{
132+
"type": "String"
133+
},
134+
{
135+
"type": "String"
136+
},
137+
{
138+
"type": "String"
139+
},
140+
{
141+
"type": "String"
142+
},
143+
{
144+
"type": "String"
145+
},
146+
{
147+
"type": "String"
148+
},
149+
{
150+
"type": "String"
151+
},
152+
{
153+
"type": "String"
154+
},
155+
{
156+
"type": "String"
157+
},
158+
{
159+
"type": "String"
160+
},
161+
{
162+
"type": "String"
163+
},
164+
{
165+
"type": "String"
166+
},
167+
{
168+
"type": "String"
169+
},
170+
{
171+
"type": "String"
172+
},
173+
{
174+
"type": "String"
175+
},
176+
{
177+
"type": "String"
178+
},
179+
{
180+
"type": "String"
181+
},
182+
{
183+
"type": "String"
184+
},
185+
{
186+
"type": "String"
187+
},
188+
{
189+
"type": "String"
190+
},
191+
{
192+
"type": "String"
193+
},
194+
{
195+
"type": "String"
196+
},
197+
{
198+
"type": "String"
199+
},
200+
{
201+
"type": "String"
202+
},
203+
{
204+
"type": "String"
205+
},
206+
{
207+
"type": "String"
208+
},
209+
{
210+
"type": "String"
211+
},
212+
{
213+
"type": "String"
214+
}
215+
]
216+
},
217+
"type": "Microsoft.DataFactory/factories/datasets"
218+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"name": "AzureBlobStorage1",
3+
"type": "Microsoft.DataFactory/factories/linkedservices",
4+
"properties": {
5+
"annotations": [],
6+
"type": "AzureBlobStorage",
7+
"typeProperties": {
8+
"connectionString": {
9+
"type": "AzureKeyVaultSecret",
10+
"store": {
11+
"referenceName": "AzureKeyVault1",
12+
"type": "LinkedServiceReference"
13+
},
14+
"secretName": "StorageConnectString"
15+
}
16+
}
17+
}
18+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"name": "AzureDatabricks1",
3+
"properties": {
4+
"annotations": [],
5+
"type": "AzureDatabricks",
6+
"typeProperties": {
7+
"domain": "https://adb-1003029183931884.4.azuredatabricks.net",
8+
"accessToken": {
9+
"type": "AzureKeyVaultSecret",
10+
"store": {
11+
"referenceName": "AzureKeyVault1",
12+
"type": "LinkedServiceReference"
13+
},
14+
"secretName": "databricks-token"
15+
},
16+
"existingClusterId": "0515-102917-bonus843"
17+
}
18+
},
19+
"type": "Microsoft.DataFactory/factories/linkedservices"
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"name": "AzureKeyVault1",
3+
"type": "Microsoft.DataFactory/factories/linkedservices",
4+
"properties": {
5+
"annotations": [],
6+
"type": "AzureKeyVault",
7+
"typeProperties": {
8+
"baseUrl": "https://yourbase.vault.azure.net/"
9+
}
10+
}
11+
}

0 commit comments

Comments
 (0)