I am trying to save data from yfinance to mongodb in a Timeseries db that I created following the instructions in the documentation. Is there an easy way of converting yfinance data to BSON UTC format? I need to write and read datetimes. I work with pymongo and pandas dataframes.
@Yannis_Antypas I just remembered that a while ago I published a complete script that handles the date.
It converts a mariadb table to mongodb and does the date thing as part of that. Hope this helps.
I wrote some code - its not the most efficient - but it works.
import pymongo
import yfinance as yf
import pandas as pd
from pymongo import MongoClient
import pytz
import datetime
from datetime import datetime, timezone
# Connect to the database
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["stocks"]
# Get the "OHLCV" collection
ohlc_collection = db["OHLCV"]
# List of ticker symbols
ticker_symbols = ["AAPL","GOOG"]
# Create an empty list to store the data for each ticker
ticker_data_list = []
# Loop through the ticker symbols
for ticker in ticker_symbols:
try:
# Retrieve the stock data for the current ticker
ticker_data = yf.Ticker(ticker).history(period="1y")
# Add a column with the ticker symbol to the data
ticker_data.insert(0, "Ticker", ticker)
# Append the data to the list
ticker_data_list.append(ticker_data)
except Exception as e:
print(f"An error occurred while retrieving data for {ticker}: {e}")
# Concatenate the data for each ticker into a single DataFrame
df = pd.concat(ticker_data_list).reset_index()
# Insert the 'timestamp' column in the first position
df.insert(0, 'timestamp', None)
# Iterate over the rows of the DataFrame
for index, row in df.iterrows():
# Parse the timestamp string
timestamp = row['Date']
timestamp_str = str(timestamp)
dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S%z')
# Convert the timestamp to UTC
dt_utc = dt.astimezone(timezone.utc)
# Use the strftime method to format the datetime object as a string in the desired format
formatted_string = dt_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
dt = datetime.strptime(formatted_string, '%Y-%m-%dT%H:%M:%SZ')
# Store the formatted string in the new column
df.at[index, 'timestamp'] = dt
# Drop the 'Date' column
df = df.drop(columns=['Date'])
# Get the data as a list of dictionaries
data_dict = df.to_dict(orient="records")
try:
# Insert the data into the collection
result = ohlc_collection.insert_many(data_dict,
ordered=True)
# Print a message indicating the number of documents inserted and the current time
print(f"Inserted {len(result.inserted_ids)} documents into the collection")
print(f"last modified {datetime.utcnow()}")
except Exception as e:
print(f"An error occurred while inserting data into the collection: {e}")
finally:
# Close the connection to the database
client.close()