Twitter Data is crawled and nested upto 6th level by using ready-made Python crawler. Data schema is created in R. Many columns are repeated at root level or within the nested level.
root
|-- contributors: string (nullable = true)
|-- coordinates: string (nullable = true)
|-- created_at: string (nullable = true)
|-- entities: struct (nullable = true)
| |-- hashtags: array (nullable = true)
| | |-- element: string (containsNull = true)
| |-- media: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- additional_media_info: struct (nullable = true)
| | | | |-- description: string (nullable = true)
| | | | |-- embeddable: boolean (nullable = true)
| | | | |-- monetizable: boolean (nullable = true)
| | | | |-- title: string (nullable = true)
| | | |-- display_url: string (nullable = true)
| | | |-- expanded_url: string (nullable = true)
| | | |-- id: long (nullable = true)
| | | |-- id_str: string (nullable = true)
| | | |-- indices: array (nullable = true)
| | | | |-- element: long (containsNull = true)
| | | |-- media_url: string (nullable = true)
| | | |-- media_url_https: string (nullable = true)
| | | |-- sizes: struct (nullable = true)
| | | | |-- large: struct (nullable = true)
| | | | | |-- h: long (nullable = true)
| | | | | |-- resize: string (nullable = true)
| | | | | |-- w: long (nullable = true)
| | | | |-- medium: struct (nullable = true)
| | | | | |-- h: long (nullable = true)
| | | | | |-- resize: string (nullable = true)
| | | | | |-- w: long (nullable = true)
| | | | |-- small: struct (nullable = true)
| | | | | |-- h: long (nullable = true)
| | | | | |-- resize: string (nullable = true)
| | | | | |-- w: long (nullable = true)
| | | | |-- thumb: struct (nullable = true)
| | | | | |-- h: long (nullable = true)
| | | | | |-- resize: string (nullable = true)
| | | | | |-- w: long (nullable = true)
| | | |-- source_status_id: long (nullable = true)
| | | |-- source_status_id_str: string (nullable = true)
| | | |-- source_user_id: long (nullable = true)
| | | |-- source_user_id_str: string (nullable = true)
| | | |-- type: string (nullable = true)
| | | |-- url: string (nullable = true)
| |-- symbols: array (nullable = true)
| | |-- element: string (containsNull = true)
| |-- urls: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- display_url: string (nullable = true)
| | | |-- expanded_url: string (nullable = true)
| | | |-- indices: array (nullable = true)
| | | | |-- element: long (containsNull = true)
| | | |-- url: string (nullable = true)
| |-- user_mentions: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- id: long (nullable = true)
| | | |-- id_str: string (nullable = true)
| | | |-- indices: array (nullable = true)
| | | | |-- element: long (containsNull = true)
| | | |-- name: string (nullable = true)
| | | |-- screen_name: string (nullable = true)
|-- extended_entities: struct (nullable = true)
| |-- media: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- additional_media_info: struct (nullable = true)
| | | | |-- description: string (nullable = true)
| | | | |-- embeddable: boolean (nullable = true)
| | | | |-- monetizable: boolean (nullable = true)
| | | | |-- title: string (nullable = true)
| | | |-- display_url: string (nullable = true)
| | | |-- expanded_url: string (nullable = true)
| | | |-- id: long (nullable = true)
| | | |-- id_str: string (nullable = true)
| | | |-- indices: array (nullable = true)
| | | | |-- element: long (containsNull = true)
| | | |-- media_url: string (nullable = true)
| | | |-- media_url_https: string (nullable = true)
| | | |-- sizes: struct (nullable = true)
| | | | |-- large: struct (nullable = true)
| | | | | |-- h: long (nullable = true)
| | | | | |-- resize: string (nullable = true)
| | | | | |-- w: long (nullable = true)
| | | | |-- medium: struct (nullable = true)
| | | | | |-- h: long (nullable = true)
| | | | | |-- resize: string (nullable = true)
| | | | | |-- w: long (nullable = true)
| | | | |-- small: struct (nullable = true)
| | | | | |-- h: long (nullable = true)
| | | | | |-- resize: string (nullable = true)
| | | | | |-- w: long (nullable = true)
| | | | |-- thumb: struct (nullable = true)
| | | | | |-- h: long (nullable = true)
| | | | | |-- resize: string (nullable = true)
| | | | | |-- w: long (nullable = true)
| | | |-- source_status_id: long (nullable = true)
| | | |-- source_status_id_str: string (nullable = true)
| | | |-- source_user_id: long (nullable = true)
| | | |-- source_user_id_str: string (nullable = true)
| | | |-- type: string (nullable = true)
| | | |-- url: string (nullable = true)
| | | |-- video_info: struct (nullable = true)
| | | | |-- aspect_ratio: array (nullable = true)
| | | | | |-- element: long (containsNull = true)
| | | | |-- duration_millis: long (nullable = true)
| | | | |-- variants: array (nullable = true)
| | | | | |-- element: struct (containsNull = true)
| | | | | | |-- bitrate: long (nullable = true)
| | | | | | |-- content_type: string (nullable = true)
| | | | | | |-- url: string (nullable = true)
|-- extended_tweet: struct (nullable = true)
| |-- display_text_range: array (nullable = true)
| | |-- element: long (containsNull = true)
| |-- entities: struct (nullable = true)
| | |-- hashtags: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- indices: array (nullable = true)
| | | | | |-- element: long (containsNull = true)
| | | | |-- text: string (nullable = true)
| | |-- symbols: array (nullable = true)
| | | |-- element: string (containsNull = true)
| | |-- urls: array (nullable = true)
| | | |-- element: string (containsNull = true)
| | |-- user_mentions: array (nullable = true)
| | | |-- element: string (containsNull = true)
| |-- full_text: string (nullable = true)
|-- favorite_count: long (nullable = true)
|-- favorited: boolean (nullable = true)
|-- filter_level: string (nullable = true)
|-- geo: string (nullable = true)
|-- id: long (nullable = true)
|-- id_str: string (nullable = true)
|-- in_reply_to_screen_name: string (nullable = true)
|-- in_reply_to_status_id: string (nullable = true)
|-- in_reply_to_status_id_str: string (nullable = true)
|-- in_reply_to_user_id: string (nullable = true)
|-- in_reply_to_user_id_str: string (nullable = true)
|-- is_quote_status: boolean (nullable = true)
|-- lang: string (nullable = true)
|-- place: string (nullable = true)
|-- possibly_sensitive: boolean (nullable = true)
|-- quote_count: long (nullable = true)
|-- reply_count: long (nullable = true)
|-- retweet_count: long (nullable = true)
|-- retweeted: boolean (nullable = true)
|-- retweeted_status: struct (nullable = true)
| |-- contributors: string (nullable = true)
| |-- coordinates: string (nullable = true)
| |-- created_at: string (nullable = true)
| |-- display_text_range: array (nullable = true)
| | |-- element: long (containsNull = true)
| |-- entities: struct (nullable = true)
| | |-- hashtags: array (nullable = true)
| | | |-- element: string (containsNull = true)
| | |-- media: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- additional_media_info: struct (nullable = true)
| | | | | |-- description: string (nullable = true)
| | | | | |-- embeddable: boolean (nullable = true)
| | | | | |-- monetizable: boolean (nullable = true)
| | | | | |-- title: string (nullable = true)
| | | | |-- display_url: string (nullable = true)
| | | | |-- expanded_url: string (nullable = true)
| | | | |-- id: long (nullable = true)
| | | | |-- id_str: string (nullable = true)
| | | | |-- indices: array (nullable = true)
| | | | | |-- element: long (containsNull = true)
| | | | |-- media_url: string (nullable = true)
| | | | |-- media_url_https: string (nullable = true)
| | | | |-- sizes: struct (nullable = true)
| | | | | |-- large: struct (nullable = true)
| | | | | | |-- h: long (nullable = true)
| | | | | | |-- resize: string (nullable = true)
| | | | | | |-- w: long (nullable = true)
| | | | | |-- medium: struct (nullable = true)
| | | | | | |-- h: long (nullable = true)
| | | | | | |-- resize: string (nullable = true)
| | | | | | |-- w: long (nullable = true)
| | | | | |-- small: struct (nullable = true)
| | | | | | |-- h: long (nullable = true)
| | | | | | |-- resize: string (nullable = true)
| | | | | | |-- w: long (nullable = true)
| | | | | |-- thumb: struct (nullable = true)
| | | | | | |-- h: long (nullable = true)
| | | | | | |-- resize: string (nullable = true)
| | | | | | |-- w: long (nullable = true)
| | | | |-- type: string (nullable = true)
| | | | |-- url: string (nullable = true)
| | |-- symbols: array (nullable = true)
| | | |-- element: string (containsNull = true)
| | |-- urls: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- display_url: string (nullable = true)
| | | | |-- expanded_url: string (nullable = true)
| | | | |-- indices: array (nullable = true)
| | | | | |-- element: long (containsNull = true)
| | | | |-- url: string (nullable = true)
| | |-- user_mentions: array (nullable = true)
| | | |-- element: string (containsNull = true)
| |-- extended_entities: struct (nullable = true)
| | |-- media: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- additional_media_info: struct (nullable = true)
| | | | | |-- description: string (nullable = true)
| | | | | |-- embeddable: boolean (nullable = true)
| | | | | |-- monetizable: boolean (nullable = true)
| | | | | |-- title: string (nullable = true)
| | | | |-- display_url: string (nullable = true)
| | | | |-- expanded_url: string (nullable = true)
| | | | |-- id: long (nullable = true)
| | | | |-- id_str: string (nullable = true)
| | | | |-- indices: array (nullable = true)
| | | | | |-- element: long (containsNull = true)
| | | | |-- media_url: string (nullable = true)
| | | | |-- media_url_https: string (nullable = true)
| | | | |-- sizes: struct (nullable = true)
| | | | | |-- large: struct (nullable = true)
| | | | | | |-- h: long (nullable = true)
| | | | | | |-- resize: string (nullable = true)
| | | | | | |-- w: long (nullable = true)
| | | | | |-- medium: struct (nullable = true)
| | | | | | |-- h: long (nullable = true)
| | | | | | |-- resize: string (nullable = true)
| | | | | | |-- w: long (nullable = true)
| | | | | |-- small: struct (nullable = true)
| | | | | | |-- h: long (nullable = true)
| | | | | | |-- resize: string (nullable = true)
| | | | | | |-- w: long (nullable = true)
| | | | | |-- thumb: struct (nullable = true)
| | | | | | |-- h: long (nullable = true)
| | | | | | |-- resize: string (nullable = true)
| | | | | | |-- w: long (nullable = true)
| | | | |-- type: string (nullable = true)
| | | | |-- url: string (nullable = true)
| | | | |-- video_info: struct (nullable = true)
| | | | | |-- aspect_ratio: array (nullable = true)
| | | | | | |-- element: long (containsNull = true)
| | | | | |-- duration_millis: long (nullable = true)
| | | | | |-- variants: array (nullable = true)
| | | | | | |-- element: struct (containsNull = true)
| | | | | | | |-- bitrate: long (nullable = true)
| | | | | | | |-- content_type: string (nullable = true)
| | | | | | | |-- url: string (nullable = true)
| |-- favorite_count: long (nullable = true)
| |-- favorited: boolean (nullable = true)
| |-- filter_level: string (nullable = true)
| |-- geo: string (nullable = true)
| |-- id: long (nullable = true)
| |-- id_str: string (nullable = true)
| |-- in_reply_to_screen_name: string (nullable = true)
| |-- in_reply_to_status_id: string (nullable = true)
| |-- in_reply_to_status_id_str: string (nullable = true)
| |-- in_reply_to_user_id: string (nullable = true)
| |-- in_reply_to_user_id_str: string (nullable = true)
| |-- is_quote_status: boolean (nullable = true)
| |-- lang: string (nullable = true)
| |-- place: string (nullable = true)
| |-- possibly_sensitive: boolean (nullable = true)
| |-- quote_count: long (nullable = true)
| |-- reply_count: long (nullable = true)
| |-- retweet_count: long (nullable = true)
| |-- retweeted: boolean (nullable = true)
| |-- source: string (nullable = true)
| |-- text: string (nullable = true)
| |-- truncated: boolean (nullable = true)
| |-- user: struct (nullable = true)
| | |-- contributors_enabled: boolean (nullable = true)
| | |-- created_at: string (nullable = true)
| | |-- default_profile: boolean (nullable = true)
| | |-- default_profile_image: boolean (nullable = true)
| | |-- description: string (nullable = true)
| | |-- favourites_count: long (nullable = true)
| | |-- follow_request_sent: string (nullable = true)
| | |-- followers_count: long (nullable = true)
| | |-- following: string (nullable = true)
| | |-- friends_count: long (nullable = true)
| | |-- geo_enabled: boolean (nullable = true)
| | |-- id: long (nullable = true)
| | |-- id_str: string (nullable = true)
| | |-- is_translator: boolean (nullable = true)
| | |-- lang: string (nullable = true)
| | |-- listed_count: long (nullable = true)
| | |-- location: string (nullable = true)
| | |-- name: string (nullable = true)
| | |-- notifications: string (nullable = true)
| | |-- profile_background_color: string (nullable = true)
| | |-- profile_background_image_url: string (nullable = true)
| | |-- profile_background_image_url_https: string (nullable = true)
| | |-- profile_background_tile: boolean (nullable = true)
| | |-- profile_banner_url: string (nullable = true)
| | |-- profile_image_url: string (nullable = true)
| | |-- profile_image_url_https: string (nullable = true)
| | |-- profile_link_color: string (nullable = true)
| | |-- profile_sidebar_border_color: string (nullable = true)
| | |-- profile_sidebar_fill_color: string (nullable = true)
| | |-- profile_text_color: string (nullable = true)
| | |-- profile_use_background_image: boolean (nullable = true)
| | |-- protected: boolean (nullable = true)
| | |-- screen_name: string (nullable = true)
| | |-- statuses_count: long (nullable = true)
| | |-- time_zone: string (nullable = true)
| | |-- translator_type: string (nullable = true)
| | |-- url: string (nullable = true)
| | |-- utc_offset: string (nullable = true)
| | |-- verified: boolean (nullable = true)
|-- source: string (nullable = true)
|-- text: string (nullable = true)
|-- timestamp_ms: string (nullable = true)
|-- truncated: boolean (nullable = true)
|-- user: struct (nullable = true)
| |-- created_at: string (nullable = true)
| |-- default_profile: boolean (nullable = true)
| |-- default_profile_image: boolean (nullable = true)
| |-- description: string (nullable = true)
| |-- favourites_count: long (nullable = true)
| |-- follow_request_sent: string (nullable = true)
| |-- followers_count: long (nullable = true)
| |-- following: string (nullable = true)
| |-- friends_count: long (nullable = true)
| |-- geo_enabled: boolean (nullable = true)
| |-- id: long (nullable = true)
| |-- id_str: string (nullable = true)
| |-- is_translator: boolean (nullable = true)
| |-- lang: string (nullable = true)
| |-- listed_count: long (nullable = true)
| |-- location: string (nullable = true)
| |-- name: string (nullable = true)
| |-- notifications: string (nullable = true)
| |-- profile_background_color: string (nullable = true)
| |-- profile_background_image_url: string (nullable = true)
| |-- profile_background_image_url_https: string (nullable = true)
| |-- profile_background_tile: boolean (nullable = true)
| |-- profile_banner_url: string (nullable = true)
| |-- profile_image_url: string (nullable = true)
| |-- profile_image_url_https: string (nullable = true)
| |-- profile_link_color: string (nullable = true)
| |-- profile_sidebar_border_color: string (nullable = true)
| |-- profile_sidebar_fill_color: string (nullable = true)
| |-- profile_text_color: string (nullable = true)
| |-- profile_use_background_image: boolean (nullable = true)
| |-- protected: boolean (nullable = true)
| |-- screen_name: string (nullable = true)
| |-- statuses_count: long (nullable = true)
| |-- time_zone: string (nullable = true)
| |-- translator_type: string (nullable = true)
| |-- url: string (nullable = true)
| |-- utc_offset: string (nullable = true)
| |-- verified: boolean (nullable = true)
I can not understand the meaning of most of the columns. Please explain by taking the reference from root level because the columns inside the deep level are repeated. If possible explain it in short but nice way.
Note - One record from database could have added here for reference but this question's content character length exceeds the limit so could not add the record.
from What is the meaning of these columns in database crawled from Twitter?
No comments:
Post a Comment