Data Archive
There is a data archive available under https://archive.pqopen.com to download raw measurement data.
Today, daily files are provided each measurement location with either Frequency or Frequency and Voltage Magnitude with cycle-by-cycle resolution. The provided timestamp marks the zero-crossing of the voltage waveform.
The data files are in Apache Parquet Format and can be read easily with different programming languages.
Python Example
This example demonstrates the loading of measurement files from one location, concatenating multiple days, caching the data locally for multiple use and providing a polars data frame.
import datetime
import logging
import re
from pathlib import Path
import polars as pl
import requests
# Configuration
ARCHIVE_BASE_URL = "https://archive.pqopen.com/daily/"
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def clean_location_name(location: str) -> str:
"""Replaces special characters from input string."""
return re.sub(r'[^a-zA-Z0-9_]', '_', location)
def get_archive_data(
location: str,
start_date: datetime.date,
end_date: datetime.date,
base_path: Path = Path("data"),
channels: list = ["Freq"]
) -> pl.DataFrame:
"""Loads parquet data from server (or file system) and returns a Polars dataframe."""
clean_loc = clean_location_name(location)
target_dir = base_path / clean_loc
target_dir.mkdir(parents=True, exist_ok=True)
data_frames = []
# Iterate Data range
current_date = start_date
while current_date <= end_date:
file_name = f"{current_date.strftime('%Y-%m-%d')}_{','.join(channels)}.parquet"
local_path = target_dir / file_name
if not local_path.exists():
url = f"{ARCHIVE_BASE_URL}{clean_loc}/{file_name}"
logging.info(f"Downloading {file_name}...")
response = requests.get(url)
if response.status_code == 200:
local_path.write_bytes(response.content)
else:
logging.warning(f"Could not download {file_name}: HTTP {response.status_code}")
current_date += datetime.timedelta(days=1)
continue
# Load file and harmonize timestamp format
df = pl.read_parquet(local_path).with_columns(
pl.col("_time").dt.cast_time_unit("us")
).select(["_time"] + channels)
data_frames.append(df)
current_date += datetime.timedelta(days=1)
if not data_frames:
raise ValueError("No Data in Time span.")
return pl.concat(data_frames)
if __name__ == "__main__":
# Example call
df = get_archive_data(
location="DE/Stade",
start_date=datetime.date(2026, 1, 12),
end_date=datetime.date(2026, 1, 14)
)
print(df.head())