|
| 1 | +import os |
| 2 | +import logging |
| 3 | +import pandas as pd |
| 4 | +import numpy as np |
| 5 | +import tempfile |
| 6 | +from typing import List |
| 7 | +import google.oauth2.service_account |
| 8 | +from google.cloud import storage |
| 9 | +from cloudforet.cost_analysis.error import * |
| 10 | +from spaceone.core.connector import BaseConnector |
| 11 | + |
| 12 | +_PAGE_SIZE = 1000 |
| 13 | + |
| 14 | +_LOGGER = logging.getLogger("spaceone") |
| 15 | + |
| 16 | + |
| 17 | +class GoogleStorageConnector(BaseConnector): |
| 18 | + google_client_service = "storage" |
| 19 | + version = "v1" |
| 20 | + |
| 21 | + def __init__(self, *args, **kwargs): |
| 22 | + super().__init__(*args, **kwargs) |
| 23 | + |
| 24 | + self.secret_data = kwargs.get("secret_data") |
| 25 | + self.project_id = self.secret_data.get("project_id") |
| 26 | + self.credentials = ( |
| 27 | + google.oauth2.service_account.Credentials.from_service_account_info( |
| 28 | + self.secret_data |
| 29 | + ) |
| 30 | + ) |
| 31 | + self.client = storage.Client( |
| 32 | + project=self.secret_data["project_id"], credentials=self.credentials |
| 33 | + ) |
| 34 | + |
| 35 | + def get_cost_data(self, base_url): |
| 36 | + _LOGGER.debug(f"[get_cost_data] base url: {base_url}") |
| 37 | + |
| 38 | + bucket_name = self.secret_data.get("bucket_name") |
| 39 | + bucket = self.client.get_bucket(bucket_name) |
| 40 | + blob_names = [blob.name for blob in bucket.list_blobs()] |
| 41 | + |
| 42 | + for blob_name in blob_names: |
| 43 | + blob = bucket.get_blob(blob_name) |
| 44 | + |
| 45 | + if blob: |
| 46 | + tmpdir = tempfile.gettempdir() |
| 47 | + csv_file_path = os.path.join(tmpdir, blob_name) |
| 48 | + blob.download_to_filename(csv_file_path) |
| 49 | + costs_data = self._get_csv(csv_file_path) |
| 50 | + _LOGGER.debug(f"[get_cost_data] costs count: {len(costs_data)}") |
| 51 | + |
| 52 | + # Paginate |
| 53 | + page_count = int(len(costs_data) / _PAGE_SIZE) + 1 |
| 54 | + |
| 55 | + for page_num in range(page_count): |
| 56 | + offset = _PAGE_SIZE * page_num |
| 57 | + yield costs_data[offset : offset + _PAGE_SIZE] |
| 58 | + |
| 59 | + @staticmethod |
| 60 | + def _check_options(options: dict) -> None: |
| 61 | + if "base_url" not in options: |
| 62 | + raise ERROR_REQUIRED_PARAMETER(key="options.base_url") |
| 63 | + |
| 64 | + @staticmethod |
| 65 | + def _get_csv(csv_file: str) -> List[dict]: |
| 66 | + try: |
| 67 | + df = pd.read_csv(csv_file, encoding="utf-8-sig") |
| 68 | + df = df.replace({np.nan: None}) |
| 69 | + |
| 70 | + costs_data = df.to_dict("records") |
| 71 | + return costs_data |
| 72 | + |
| 73 | + except Exception as e: |
| 74 | + _LOGGER.error(f"[_get_csv] download error: {e}", exc_info=True) |
| 75 | + raise e |
0 commit comments