#!/usr/bin/env python2
# -*- coding: UTF-8 -*-
################################################################################
import requests, re, os, sys, time, subprocess
from lxml import html
API="url_to_influxdb_api_with_auth_tokens"
# Set target URL to scrape
TARGET="http://www.meteo.physik.uni-muenchen.de/dokuwiki/doku.php?do=export_xhtmlbody&id=wetter:stadt:messung"
def flextract (data,min,max,fallback):
# Safely extract all numbers found in string as float
regx=re.findall(r"[-+]?[0-9]*\.?[0-9]+", data)
try:
WORK=float(regx[0])
if WORK <= max and \
WORK >= min and \
WORK != "999.9":
return WORK
else:
return fallback
except:
return fallback
################################################################################
## MAIN ######################################################################
################################################################################
def main():
# Define some sane starting points in case everything fails
OUTT=25.0
OUTH=60.0
OUTP=950.0
PREV=0.0
PRET=""
WSPD=0.0
WDIR=0
SRAD=0.0
DRAD=0.0
ED=0
while True:
if ED == 0:
try:
# Get the target's content
page = requests.get(TARGET)
# use lxml's html magic to structuture the data
tree = html.fromstring(page.text)
# gather all values found in elements
data = tree.xpath('//td/text()')
except:
pass
time.sleep (9.5)
ED=1
else:
ED=0
time.sleep (10)
# Air Temperature (2m) in degC
OUTT=flextract(data[2],-35,45,OUTT)
# Air Pressure in hPa
OUTP=flextract(data[26],0.0,1200.0,OUTP)
# Air Humidity (2m) in %
OUTH=flextract(data[8],0.0,100.0,OUTH)
# Precipitation Volume in mm
PREV=flextract(data[29],0.0,500.0,PREV)
# Precipitation Type
PRET=data[31]
PRET=PRET.encode('utf-8').strip()
# Windspeed in m/s
WSPD=flextract(data[10],0.0,100.0,WSPD)
# Wind Direction in deg
WDIR=int(flextract(data[28],0,360,WDIR))
# Global Solar Radiation (direct)
SRAD=flextract(data[21],0.0,1200.0,SRAD)
# Global Solar Radiation (diffuse)
DRAD=flextract(data[22],0.0,1200.0,DRAD)
# Give a 15% temp gain (based on OUTT) to PV modules (FIXME: come up with something better based on SRAD until sensors are in place for real values)
PV_T=OUTT + ((OUTT/100.0)*15.0)
# Odyssey UCSSPM Long-Term Evaluation
try:
proc = subprocess.Popen(['./ucsspm.py', '-lat', '48.11', '-lon', '11.11', '-at_t', str(OUTT), '-at_p', str(OUTP), '-at_h', str(OUTH), '-pv_t', str(PV_T)], stdout=subprocess.PIPE)
for line in proc.stdout.readlines():
output=line.rstrip()
ucsspmO=output.split('|')
except:
pass
# Aquarius UCSSPM Long-Term Evaluation
try:
proc = subprocess.Popen(['./ucsspm.py', '-lat', '48.11', '-lon', '11.11', '-at_t', str(OUTT), '-at_p', str(OUTP), '-at_h', str(OUTH), '-pv_t', str(PV_T), '-pv_a', '5.0', '-pv_tc', '0.29', '-pv_e', '19.4'], stdout=subprocess.PIPE)
for line in proc.stdout.readlines():
output=line.rstrip()
ucsspmA=output.split('|')
except:
pass
payload = []
payload.append('[{"name": "aquarius.env.outdoor.temp", "columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (OUTT,'°C'))
payload.append(' {"name": "aquarius.env.outdoor.baro", "columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (OUTP,'hPa'))
payload.append(' {"name": "aquarius.env.outdoor.hygro", "columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (OUTH,'%'))
payload.append(' {"name": "aquarius.env.outdoor.precip","columns": ["value", "type", "unit"], "points": [[%.1f,"%s","%s"]]},' % (PREV,PRET,'mm'))
payload.append(' {"name": "aquarius.env.outdoor.wind", "columns": ["value", "type", "unit"], "points": [[%d,"%s","%s"]]},' % (WDIR,'direction','°'))
payload.append(' {"name": "aquarius.env.outdoor.wind", "columns": ["value", "type", "unit"], "points": [[%.1f,"%s","%s"]]},' % (WSPD,'speed','m/s'))
payload.append(' {"name": "aquarius.env.outdoor.pyrano","columns": ["value", "type", "unit"], "points": [[%.1f,"%s","%s"]]},' % (SRAD,'direct','W/m²'))
payload.append(' {"name": "aquarius.env.outdoor.pyrano","columns": ["value", "type", "unit"], "points": [[%.1f,"%s","%s"]]},' % (DRAD,'diffuse','W/m²'))
payload.append(' {"name": "aquarius.ucsspm.etr","columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (float(ucsspmA[0]),'W/m²'))
payload.append(' {"name": "aquarius.ucsspm.rso","columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (float(ucsspmA[1]),'W/m²'))
payload.append(' {"name": "aquarius.ucsspm.sza","columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (float(ucsspmA[2]),'°'))
payload.append(' {"name": "aquarius.ucsspm.max","columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (float(ucsspmA[3]),'W/m²'))
payload.append(' {"name": "aquarius.ucsspm.out","columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (float(ucsspmA[4]),'W/m²'))
payload.append(' {"name": "odyssey.ucsspm.etr","columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (float(ucsspmO[0]),'W/m²'))
payload.append(' {"name": "odyssey.ucsspm.rso","columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (float(ucsspmO[1]),'W/m²'))
payload.append(' {"name": "odyssey.ucsspm.sza","columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (float(ucsspmO[2]),'°'))
payload.append(' {"name": "odyssey.ucsspm.max","columns": ["value", "unit"], "points": [[%.1f,"%s"]]},' % (float(ucsspmO[3]),'W/m²'))
payload.append(' {"name": "odyssey.ucsspm.out","columns": ["value", "unit"], "points": [[%.1f,"%s"]]}]' % (float(ucsspmO[4]),'W/m²'))
try:
requests.post(url=API,data=''.join(payload),timeout=2)
except:
pass
################################################################################
if __name__ == '__main__':
rc = main()
sys.exit (rc)
And that's that. Success. The only thing left to do, in order to close the circle again, was to share this knowledge, so that the next person looking for ways to scrape data from web pages with python can copy these examples, adapt them according to the new use case and fail and learn and come up with new ideas as well. Hopefully in even less time. And it also made it pretty obvious that the [[lab:ucsspm|UCSSPM]] code has to be refactored again, so that it can be included as a python lib in order to get rid of the system call and all the input/output piping :)
You can see the results of this robot's actions in the **[[https://apollo.open-resource.org/flight-control/vfcc/|Virtual Flight Control Center (VFCC)]]**
And of course it goes without saying that this also serves to show pretty well how important learning computer languages will become. We cannot create a army of slaves to do our bidding (for that is what all these machines/computers/systems like smartphones, IoT devices, automatons really are) if we don't know how to command them. Our current technological state is only possible because we already give an essential part of our workload to machines.
But how do we expect people to be able to tell all these machines what and how exactly they're supposed to do something (training a new slave/servant) if we're not willing to speak their language? It will still take some time until we've reached a state where we have more generalized systems or the first beginnings of real (buzzword alert) artificial intelligence. Up to here it's just people programming states and reactions to these states in smart and creative fashion but we still have to do it in their way. So why still force people to involuntary learn dead stuff like latin or french when the future for all of us lies in computers & programming languages?
{{tag>software ucsspm python data scraping metrics influxdb vfcc research}}
{{keywords>Apollo-NG apollo next generation hackerspace hacker space development makerspace fablab diy community open-resource open resource mobile hackbus software solar energy prediction ucsspm algorithm python research}}
~~DISCUSSION~~ |