---
- name: Collect metrics
oracle.oci.oci_monitoring_metric_actions:
compartment_id: "your-compartment-ocid"
action: list
namespace: oci_computeagent
dimension_filters: { "resourceID": "your-compute-instance-ocid"}
register: result
- name: Set list of indexes (number of metrics)
ansible.builtin.set_fact:
l_index: "{{ range(result.metric | length) }}"
- name: Show metrics
ansible.builtin.debug:
msg:
- "{{ (result | dict2items)[1][\"value\"][index][\"name\"] }}"
loop: "{{ l_index }}" # loop through list indexes
loop_control:
index_var: index
...
# Role tests main.yml is like:
---
- name: Test role
connection: local
hosts: localhost
roles:
- role: ../../oci-metrics
...
# Run role: ansible-playbook -i inventory test.yml
# Expect ten metrics in result:
ok: [localhost] => (item=0) => "CpuUtilization"
ok: [localhost] => (item=1) => "DiskBytesRead"
ok: [localhost] => (item=2) => "DiskBytesWritten"
ok: [localhost] => (item=3) => "DiskIopsRead"
ok: [localhost] => (item=4) => "DiskIopsWritten"
ok: [localhost] => (item=5) => "LoadAverage"
ok: [localhost] => (item=6) => "MemoryAllocationStalls"
ok: [localhost] => (item=7) => "MemoryUtilization"
ok: [localhost] => (item=8) => "NetworksBytesIn"
ok: [localhost] => (item=9) => "NetworksBytesOut"
|
#!/bin/python3
import socket
import argparse
import oci
import psutil
from datetime import datetime
# arguments are location (on-prem or OCI region) and partition (ex, /, /boot)
parser = argparse.ArgumentParser(description=f"OCI metric disk_usage for {socket.gethostname()}")
parser.add_argument("--location", help="Location", required=True)
parser.add_argument("--partition", help="Partition", required=True)
args = parser.parse_args()
location = args.location
partition = args.partition
comp_ocid = "...your-compartment-ocid..."
hostname = socket.gethostname()
metric_name = "disk_usage"
metric_namespace = "your-team" # ex. custom metrics for your team
# Use default config file ~/.oci/config
config = oci.config.from_file()
# create monitoring service client
monitoring_client = \
oci.monitoring.MonitoringClient(config, service_endpoint="https://telemetry-ingestion. |
15 * * * * disk-usage.py --location IAD --partition / 15 * * * * disk-usage.py --location IAD --partition /boot |
#
# the only supported return value for oci metrics is number
# workaround for plugins that return string
#
def check_service_state(service_name):
try:
subprocess.check_output(['systemctl', 'is-active', service_name])
return 0
except subprocess.CalledProcessError:
return 1
|
15 * * * * service-state.py --location IAD --service mysqld |
#!/bin/python3
# Returns aggregated data from query.
from datetime import datetime
import oci
import argparse
parser = argparse.ArgumentParser(description=f"Disk usage metrics")
parser.add_argument("--partition", help="Partition", required=True)
parser.add_argument("--start", help="Start time for metric yyyy-mm-dd", required=True)
parser.add_argument("--end", help="End time for metric yyyy-mm-dd", required=True)
args = parser.parse_args()
partition = args.partition
start = args.start
end = args.end
# Use default config file ~/.oci/config
config = oci.config.from_file()
# Initialize service client with default config file
monitoring_client = oci.monitoring.MonitoringClient(config)
comp_id = "ocid1.compartment.oc1..your-id"
namespace = "your-namespace"
query = f"disk_usage[1h]{{partition = \"{partition}\"}}.mean()"
summarize_metrics_data_response = monitoring_client.summarize_metrics_data(
compartment_id=f"{comp_id}",
summarize_metrics_data_details=oci.monitoring.models.SummarizeMetricsDataDetails(
namespace=f"{namespace}",
query=f"{query}",
start_time=f"{start}T00:00:00+00",
end_time=f"{end}T00:00:00+00"))
# Get the data from response
print(summarize_metrics_data_response.data)
|
$ python3 summarize-disk-usage.py --partition / --start 2023-06-19 --end 2023-06-21
[{
"aggregated_datapoints": [
{
"timestamp": "2023-06-19T00:00:00+00:00",
"value": 17.4
},
-- shortened --
{
"timestamp": "2023-06-21T00:00:00+00:00",
"value": 17.0
}
],
"compartment_id": "ocid1.compartment.oc1...a",
"dimensions": {
"hostname": "myhostname.domain.com",
"partition": "/"
},
"metadata": {},
"name": "disk_usage",
"namespace": "your-namespace",
"resolution": null,
"resource_group": null
}]
|
---
- name: Get alarm mysqld-down
oracle.oci.oci_monitoring_alarm_facts:
alarm_id: "your-alarm-ocid"
...
# expected result is like:
"alarms": [
{
"body": "Mysqld service is not online. ",
"display_name": "mysql-down",
"is_enabled": true,
"lifecycle_state": "ACTIVE",
"namespace": "your-custom-team-namespace",
"query": "service_state[1h]{service = \"mysqld\"}.mean() not in (0, 0)",
"severity": "CRITICAL",
"suppression": null,
}
|