REMOTE_FILE

The REMOTE_FILE node loads a remote file using an HTTP URL and converts it to a DataContainer class.Note: If both the file_url and default are not specified when file_type="Image", a default image will be loaded. For now, REMOTE_FILE only supports HTTP file URLs, in particular GCP URL (starting with gcp://). S3 URL (starting with s3://) and other bucket-like URLs are not supported. If the file url is not specified and the default input is not connected, or if the file url is not a valid URL, a ValueError is raised.Params:file_url : strURL of the file to be loaded.default : Optional[TextBlob]If this input node is connected, the file URL will be taken from the output of the connected node. To be used in conjunction with batch processing.file_type : strType of file to load, default = image.Returns:out : Image|DataFrameImage for file_type 'image'. DataFrame for file_type 'json', 'csv', 'excel', 'xml'.

Python Code

from urllib.parse import urlparse

from flojoy import flojoy, Image, DataFrame, Grayscale, TextBlob
from typing import Literal, Optional
import numpy as np
from PIL import Image as PIL_Image
import pandas as pd

import requests
from io import BytesIO


def get_file_url(url: str):
    parse_result = urlparse(url)
    if not all([parse_result.scheme, parse_result.netloc]):
        raise ValueError(f"'{url}' is not a valid URL.")
    return url


# TODO: Consolidate remote files (S3, HTTP, etc...) and perhaps even merge REMOTE_FILE AND LOCAL_FILE in a single node.
@flojoy(
    deps={
        "xlrd": "2.0.1",
        "lxml": "4.9.2",
        "openpyxl": "3.0.10",
        "scikit-image": "0.21.0",
    }
)
def REMOTE_FILE(
    file_url: str = None,
    default: Optional[TextBlob] = None,
    file_type: Literal["Image", "Grayscale", "JSON", "CSV", "Excel", "XML"] = "Image",
) -> Image | DataFrame:
    """The REMOTE_FILE node loads a remote file using an HTTP URL and converts it to a DataContainer class.

    Note: If both the file_url and default are not specified when file_type="Image", a default image will be loaded.

    For now, REMOTE_FILE only supports HTTP file URLs, in particular GCP URL (starting with gcp://). S3 URL (starting with s3://) and other bucket-like URLs are not supported.

    If the file url is not specified and the default input is not connected, or if the file url is not a valid URL, a ValueError is raised.

    Parameters
    ----------
    file_url : str
        URL of the file to be loaded.
    default : Optional[TextBlob]
        If this input node is connected, the file URL will be taken from
        the output of the connected node.
        To be used in conjunction with batch processing.
    file_type : str
        Type of file to load, default = image.

    Returns
    -------
    Image|DataFrame
        Image for file_type 'image'.
        DataFrame for file_type 'json', 'csv', 'excel', 'xml'.
    """

    file_url = default.text_blob if default else file_url
    file_url = "" if file_url is None else file_url

    match file_type:
        case "Image":
            file_url = get_file_url(file_url)
            response = requests.get(file_url)
            f = PIL_Image.open(BytesIO(response.content))
            img_array = np.array(f.convert("RGBA"))
            red_channel = img_array[:, :, 0]
            green_channel = img_array[:, :, 1]
            blue_channel = img_array[:, :, 2]
            if img_array.shape[2] == 4:
                alpha_channel = img_array[:, :, 3]
            else:
                alpha_channel = None
            return Image(
                r=red_channel,
                g=green_channel,
                b=blue_channel,
                a=alpha_channel,
            )
        case "Grayscale":
            import skimage.io

            file_url = get_file_url(file_url)
            return Grayscale(img=skimage.io.imread(file_url, as_gray=True))
        case "CSV":
            file_url = get_file_url(file_url)
            df = pd.read_csv(file_url)
            return DataFrame(df=df)
        case "JSON":
            file_url = get_file_url(file_url)
            df = pd.read_json(file_url)
            return DataFrame(df=df)
        case "XML":
            file_url = get_file_url(file_url)
            df = pd.read_xml(file_url)
            return DataFrame(df=df)
        case "Excel":
            file_url = get_file_url(file_url)
            df = pd.read_excel(file_url)
            return DataFrame(df=df)

Find this Flojoy Block on GitHub

Example

Having problem with this example app? Join our Discord community and we will help you out!

In this example, a REMOTE_FILE node loads Flojoy’s logo from the internet and plots using a IMAGE node.