Saturday, 11 August 2018

Working with async module in Node.js - Part 2 (async.eachSeries)

Background

This is a continuation of my previous post on async module in Node.js -
In the last post, we saw how neatly we can write code using async.waterfall. In this post, I will show you a similar trick with async.eachSeries method. 


Without async

Let's consider the following scenario.

We get the type of events and we need to run them in a loop in order. Following is a sample code to do that -

/**
 * Program to demonstrate asyn nodejs module
 * @author : athalur
 */

const async = require("async");

var startDemo = function () {
    console.log("Starting Demo");
    var events = ["Download", "Process", "Upload", "Del"];
    events.forEach(event => {
        process(event, function () {
            console.log("Got callback for : " + event);
        });
    });
    console.log("Ending Demo");
}

var process = function (processType, callback) {
    var processTime = 0;
    switch (processType) {
        case "Download":
            processTime = 2000;
            break;
        case "Process":
            processTime = 1000;
            break;
        case "Upload":
            processTime = 4000;
            break;
        case "Del":
            processTime = 100;
            break;
    }
    setTimeout(function () {
        console.log("Finished : " + processType);
        callback();
    }, processTime);
}

startDemo();


And the output is -



Wait what happened here? We looped our events array in order -

  1. Download
  2. Process
  3. Upload
  4. Delete
But that did not clearly happen. Mostly because each process takes different time to finish. In a real-world scenario, it would mean it make API calls or disk IO, the time of which we cannot predict. Let's see how async comes to our rescue.

Change the main code as follows -

var startDemo = function () {
    console.log("Starting Demo");
    var events = ["Download", "Process", "Upload", "Del"];
    async.eachSeries(events, function (event, callback) {
        process(event, callback);
    }, function(err){
        if(!err){
            console.log("Ending Demo");
        }
    });
}

and rerun the code.




Now you can see all of them executed in a series.



NOTE1: async.forEach runs through the array in parallel, meaning it will run the function for each item in the array immediately, and then when all of them execute the callback (2nd argument), the callback function will be called (3rd argument).

NOTE2: async.eachSeries runs through the array in series, meaning it will run the function for each item in the array and wait for it to execute the callback (2nd argument) before going to next item and finally when all are done the callback function will be called (3rd argument).


Related Links


Working with async module in Node.js - Part 1 (async.waterfall)

Background

Javascript, as we know, runs on a single thread and to prevent blocking operations like a network call or a disk I/O we use asynchronous callbacks. This essentially means tasks run in the background and we get a callback when the operation is done. If you wish to understand more of how Javascript works please watch below video -




So as you must have known by now there is a lot of asynchronous stuff that happens. Sometimes we need to order these to suit our business logic. Consider a simple example -

  1. Download an mp4 file from the server
  2. Convert it into a gif locally
  3. Upload the gif back to the server
  4. Delete the mp4 from the server

Now in this example, all 4 steps are an asynchronous operation. Also, we cannot move to the next step until the previous step is finished. 

The Callback way

We can use callbacks for this. Something like below -
/**
 * Program to demonstrate asyn nodejs module
 * @author : athalur
 */

var startDemo = function () {
    console.log("Starting Demo");
    download(function () {
        process(function () {
            upload(function () {
                del(function () {
                    console.log("Ending Demo");
                })
            })
        })
    });
}

var download = function (callback) {
    console.log("Starting download");
    delay();
    console.log("Finishing download");
    callback();
}

var process = function (callback) {
    console.log("Starting process");
    delay();
    console.log("Finishing process");
    callback();
}

var upload = function (callback) {
    console.log("Starting upload");
    delay();
    console.log("Finishing upload");
    callback();
}

var del = function (callback) {
    console.log("Starting del");
    delay();
    console.log("Finishing del");
    callback();
}

var delay = function () {
    var i, j;
    for (i = 0; i < 100000; i++) {
        for (j = 0; j < 10000; j++) {
            //do nothing
        }
    }
}

startDemo();
This print below output -




As you can see this is a mess that is created by cascading callbacks. Also if at any point there is an error then we need to send it back to the callback as well and each step would have an if-else check to handle it. Let us see how easy it is with async module.


The async way




First, you need to install async nodejs module -
To do so run following command -

  • npm install async


Now using async our program becomes -

/**
 * Program to demonstrate asyn nodejs module
 * @author : athakur
 */

const async = require("async");

var startDemo = function () {
    console.log("Starting Demo");
    async.waterfall([download,
        process,
        upload,
        del],
        function (err, data) {
            if(err) {
                console.log("There was an error in the demo : " + err);
            }else {
                console.log("Demo complete successfully");
            }
        });
}


NOTE:  I have not included the actual methods again to avoid repetition.

And the output is -



Notice how cleaner our code has become. Async takes care of all the callbacks. It also provides a mechanism to send data from one step to another. If you call the callback with data it will be available in next step.

If you change our download and process method slightly like below -

var download = function (callback) {
    console.log("Starting download");
    delay();
    console.log("Finishing download");
    callback(null, "Downloaded file URL");
}

var process = function (data, callback) {
    console.log("Starting process");
    console.log("In process method. Data from download: " + data);
    delay();
    console.log("Finishing process");
    callback();
}


and re-execute we will get




Also, it provides a cleaner way of error handling. Let's say our download fails. Chain download method as below -

var download = function (callback) {
    console.log("Starting download");
    delay();
    console.log("Finishing download");
    callback("Error in download", "Downloaded file URL");
}


This essentially means our download failed - 1st argument in the callback. In this scenario, the next steps in the waterfall model will not be executed and you would get a callback method that you have provided at the end of the waterfall array. On executing above you will get -



That's all for async module - waterfall. In next post, I will show you how we can use an async module for lopping over an array of data -

Related Links



Friday, 10 August 2018

How to make HTTP/HTTPS request in Node.js

Background

Many times you need to make an external API call from your Node.js application. A simple example would be calling an API gateway from you Node.js based Lambda in your AWS environment. In this post, I will show you two ways to do this -
  1. The standard http/https library
  2. The request library


Using the standard http/https library

 Let's see how we can use the standard http library to make an API request.

To use standard http or https library you can simply import the module using -

const https = require("https");
const http = require("http");

Now you can use these to make your http or https calls. A sample is provided below -


/**
 * Node.js code to demonstrate https calls.
 * @author athakur
 */
const https = require("https");

var startDemo = function () {
    console.log("starting demo code");
    executeHttps(function (err, data) {
        if (err) {
            console.log("Error in running demo code");
        }
        else {
            console.log("Successfully ending demo code");
        }

    });
}


var executeHttps = function (callback) {
    var options = {
        hostname: "opensourceforgeeks.blogspot.com",
        port: 443,
        path: "/p/about-me.html",
        method: 'GET',
        headers: {
            'Content-Type': 'text/html'
        }
    };

    var req = https.request(options, function (res) {
        console.log("Status for API call : " + res.statusCode);
        console.log("Headers for API call : " + JSON.stringify(res.headers));
        res.setEncoding('utf8');

        var body = '';

        res.on('data', function (chunk) {
            body = body + chunk;
        });

        res.on('end', function () {
            console.log("Body for API call : " + body.length);
            if (res.statusCode != 200) {
                console.log("API call failed with response code " + res.statusCode);
                callback("API call failed with response code " + res.statusCode, null)
            } else {
                console.log("Got response : " + body.length);
                callback(null, body);
            }
        });
    });

    req.on('error', function (e) {
        console.log("problem with API call : " + e.message);
        callback(e, null);
    });

    req.end();
}


startDemo();


You can get this code on my Github gist as well - https://gist.github.com/aniket91/2f6e92a005eb2a62fcc1ddd39aac6dc2


To execute just run (Assuming your file name is test.js) -
  • node test.js


You can similarly do it for http as well. For http you need to use -
  • const https = require("http"); 
  • change port to 80 in options
  • call http.request instead of https.request
NOTE: Notice how we are building the body on 'data' event listener and then processing the request on 'end' event. I have seen developers processing data on 'data' event listener only which is not correct. It will break if your response is huge and comes in chunks.

Similarly, you can execute POST method. Change options to -

    var options = {
        hostname: "opensourceforgeeks.blogspot.com",
        port: 80,
        path: "/p/about-me.html",
        method: 'POST',
        headers: {
            'Content-Type': 'text/html',
            'Content-Length': Buffer.byteLength(post_data)
        }
    };



and then before you close request using req.end(); add
  • req.write(post_data);



Now that we have seen how http/https modules work in nodejs let's see how request module works.

Using the request library

Request module is more user-friendly to use.


To begin with, you need to install request module dependency since it is not a standard library that comes with nodejs. To install execute the following command -
  • npm install request


 You should see a folder called node_modules getting created in your directory with the request and other dependent modules getting installed.

You can import request module using -
  • const request = require('request');

Then you can use it as follows -

/**
 * Node.js code to demonstrate https calls.
 * @author athakur
 */
const request = require('request');

var startDemo = function () {
    console.log("starting demo code");
    executeRequest(function (err, data) {
        if (err) {
            console.log("Error in running demo code");
        }
        else {
            console.log("Successfully ending demo code");
        }

    });
}


var executeRequest = function(callback){
    var headers = {}
    headers['Content-type'] = 'text/html'
    //console.log('Payload for refresh_token: ', querString.stringify(payload))
    request({
        url: 'https://opensourceforgeeks.blogspot.com//p/about-me.html',
        method: 'GET',
        headers: headers
    }, function (err, response, body) {
        if (err) {
            console.error('API failed : ', err)
            callback(err)
        } else {
            console.log("Statuscode: " + response.statusCode);
            console.log("Got response : " + body.length);
            callback(null, body);

        }
    })
}



And the output is -


You can execute POST call as well by changing method type to POST. Eg -

    request({
        url: 'https://opensourceforgeeks.blogspot.com//p/about-me.html',
        method: 'POST',
        body: payload,
        headers: headers
    }, function (err, response, body) {
        if (err) {
            console.error('API failed : ', err)
            callback(err)
        } else {
            console.log("Statuscode: " + response.statusCode);
            console.log("Got response : " + body.length);
            callback(null, body);

        }
    });




Hope this helps! Let me know if you have any questions. Thanks.




Related Links 




Monday, 16 July 2018

How to fix "Unable to find a region via the region provider chain" exception with AWS SDK

Background

Recently I was working on a task that needed to upload and download a test file on AWS S3 bucket. For this, I used AWS Java SDK. The functionality seemed to work fine until the code was deployed in production. In this post, I will try to explain why does this exception occur and how we can fix this.



Code that I had used for the test upload was as follows -

  try {
   BasicAWSCredentials awsCreds = new BasicAWSCredentials("YourAccessKeyId", "YourSecretKey");
   AmazonS3 s3client = AmazonS3ClientBuilder.standard()
                    .withCredentials(new AWSStaticCredentialsProvider(awsCreds))
                    .build();
   s3client.putObject(storage.getBucketName(), "test.txt", "Done!");

  } catch (AmazonServiceException ase) {
   logger.error(
     "Caught an AmazonServiceException, which means your request made it to Amazon S3, but was rejected with an error response for some reason. storage : {}",
     storage, ase);
   logger.error("Error Message:    {}", ase.getMessage());
   logger.error("HTTP Status Code: {}", ase.getStatusCode());
   logger.error("AWS Error Code:   {}", ase.getErrorCode());
   logger.error("Error Type:       {}", ase.getErrorType());
   logger.error("Request ID:       {}", ase.getRequestId());
  } catch (AmazonClientException ace) {
   logger.error(
     "Caught an AmazonClientException, which means the client encountered an internal error while trying to communicate with S3, such as not being able to access the network storage : {}",
     storage, ace);
   logger.error("Error Message: {}", ace.getMessage());
  } catch (Exception ex) {
   logger.error("Got exception while testing upload to S3", ex);
  }

I had tested this code by deploying it on one of our EC2 instances and it worked fine. I will probably explain this at a later point as to why it worked with EC2 instance but there is a major flaw with the above code that finally ends up throwing an exception - "Unable to find a region via the region provider chain"

Relevant Stacktrace:

com.amazonaws.client.builder.AwsClientBuilder.configureMutableProperties(AwsClientBuilder.java:352) 
at com.amazonaws.client.builder.AwsClientBuilder.setRegion(AwsClientBuilder.java:386) 
com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
Must provide an explicit region in the builder or setup environment to supply a region.



The problem

The problem with the above piece of code is that we are not supplying the AmazonS3ClientBuilder with the region. Even though S3 is a global service, the bucket that you create are region specific. Each region would have its own endpoint and hence AWS S3 SDK need to know which region to know the endpoint it needs to make the API call to.

The Solution

The simplest solution is to pass the region to the AmazonS3ClientBuilder builder explicitly as follows -

AmazonS3 s3client = AmazonS3ClientBuilder.standard()
        .withCredentials(new AWSStaticCredentialsProvider(awsCreds))
        .withRegion(Regions.US_EAST_1)
        .build();


NOTE: After you build a client with the builder, it's immutable and the region cannot be changed. If you are working with multiple AWS Regions for the same service, you should create multiple clients—one per region.

Understanding the solution

The problem and the solution might appear simple but there are various aspects you need to understand about setting region in your S3 builder.

There are various ways your AWS SDK can infer the region to use instead if you explicitly providing it yourself.

NOTE: You must use client builders to have the SDK automatically detect the region your code is running in. This is not applicable if you are using client constructor and default region from the SDK will be used.

If you don't explicitly set a region using the withRegion methods, the SDK consults the default region provider chain to try and determine the region to use.


  1. Any explicit region set by using withRegion or setRegion on the builder itself takes precedence over anything else.
  2. The AWS_REGION environment variable is checked. If it's set, that region is used to configure the client.
    1. NOTE: This environment variable is set by the Lambda container.
  3. The SDK checks the AWS shared configuration file (usually located at ~/.aws/config). If the region property is present, the SDK uses it.
    1. The AWS_CONFIG_FILE environment variable can be used to customize the location of the shared config file.
    2. The AWS_PROFILE environment variable or the aws.profile system property can be used to customize the profile that is loaded by the SDK.
  4. The SDK attempts to use the Amazon EC2 instance metadata service to determine the region of the currently running Amazon EC2 instance.

If the SDK still hasn't found a region by this point, client creation fails with an exception. And we saw what exception it throws :) It is is the reason I am writing this post.  Also, you must have realized why it worked for me with the EC2 instance. As per point number 4, SDK attempts to use the Amazon EC2 instance metadata service to determine the region. Hope this helps!



Related Links



Wednesday, 4 July 2018

Android material design colors

Background

When you creating an Android app from Android studio, studio pre-creates certain files for you. This includes the default style for the application. It looks something like below -


<resources>
    <!-- Base application theme. -->
    <style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
        <!-- Customize your theme here. -->
        <item name="colorPrimary">@color/colorPrimary</item>
        <item name="colorPrimaryDark">@color/colorPrimaryDark</item>
        <item name="colorAccent">@color/colorAccent</item>
    </style>
</resources>

Now the question is what is this 
  • colorPrimary,
  • colorPrimaryDark and
  • colorAccent
These are all based on material design guidelines. For me, colors are as follows -

<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="colorPrimary">#3F51B5</color>
    <color name="colorPrimaryDark">#303F9F</color>
    <color name="colorAccent">#FF4081</color>
</resources>

But you can really choose any color theme you want. For more details, you can see google documentation -

In this post, I will list what each color naming means.


Android material design colors

Following are naming conventions used -

  • colorPrimary:  The color of the app bar.
  • colorPrimaryDark: The color of the status bar and contextual app bars; this is normally a dark version of colorPrimary.
  • colorAccent: The color of UI controls such as checkboxes, radio buttons, and edit text boxes.
  • windowBackground: The color of the screen background.
  • textColorPrimary: The color of UI text in the app bar.
  • statusBarColor: The color of the status bar.
  • navigationBarColor: The color of the navigation bar.


To visualize refer to the following picture -





You can create and preview your colors here -> https://www.materialpalette.com/


Related Links

Sunday, 1 July 2018

Understanding JSON Web Tokens (JWT)

Background

JSON Web Token (JWT) is an open standard ( RFC 7519) that defines a compact and self-contained way for securely transmitting information between parties as a JSON object.


What this essentially means is that JWT helps you secure your application by allowing to securely share claims such as user data between your application and the client that is communicating it. In this post, I will explain fundamentals of JWT and how it works.






When you would use JWT?

As mentioned before JWT can be used to secure communication between two parties that exchange data. Following are some common use cases -

  1. Authorization:  This is a very common use case of JWT. When users log into an application, they are presented with a JWT token. Each of the subsequent requests that user makes to this application must include this JWT token. Your application would validate each of the requests to see if it has valid JWT before processing it.
  2. Information exchange:  You can also transfer information in JWT in a secure manner. This means JWT can have data as part of it. Note that this information sent is visible to anyone having access to that token. However, no one can alter this information since the token signature is calculated based on the information it holds and altering any data will change the signature which would invalidate the token. We will see this in detail in some time. 


What does JWT look like?

Now that we know when can we use a JWT, let's see how does it look like and what does it comprise of.

A JWT typically looks like -
  • xxxxx.yyyyy.zzzzz
As you can see it consists of 3 parts separated by a dot. These three parts are -
  1. Header
  2. Payload
  3. Signature 
Let's see what each of these sections represent.

Header: Header is a JSON that mainly consists of two parts - the type of token which is JWT and the algorithm used for hashing. For example,


{
  "alg": "HS256",
  "typ": "JWT"
}

The header section of JWT is Base64 URL encode of above JSON.


Payload: This is the 2nd part of the JWT. This section contains the claims or the information that needs to be shared. This is again in JSON format. These claims can be of 3 types -


  1. Registered claims: These are predefined claims which are optional but useful to convey proper information. Example can be iss(issuer), exp(expiration time), sub(subject) etc. You can see all the claims here.
  2. Public claims:  These can be defined by anyone using JWT but should be defined in  IANA JSON Web Token Registry to avoid a collision.
  3. Private claims: These are custom claims that can be added to share information between two parties as long as both agree on using them. These are neither registered or private claims.
Example -

{
  "sub": "Welcome to my blog!",
  "name": "Aniket Thakur",
  "iss": "opensourceforgeeks.blogspot.com",
  "exp": 1530436998
}

The payload section of JWT is Base64 URL encode of above JSON.

Signature: This is the 3rd and last part of JWT. This represents the signature of the token. This is created as follows -

Signature = Hash (base64UrlEncode(header) + "." + base64UrlEncode(payload), secret)

As you can see signature comprises of the header and well as payload. So if anyone changes header or payload then the signature would not match and hence JWT validation would fail. This is exactly why I mentioned before that JWT is secure from tampering. Also, notice that a secret key is used to compute the hash value. This secret will never be available to anyone (just the application generating the JWT to protect its resources). And finally, the hashing method would depend on the algorithm you are using. In above example we used - HS256 (HMAC with SHA-256).


So, assuming the secret key used is "secret", for the example we have used above JWT  would look like -

eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJXZWxjb21lIHRvIG15IGJsb2chIiwibmFtZSI6IkFuaWtldCBUaGFrdXIiLCJpc3MiOiJvcGVuc291cmNlZm9yZ2Vla3MuYmxvZ3Nwb3QuY29tIiwiZXhwIjoxNTMwNDM2OTk4fQ.8pmcCeFS9gx8Yb-DPRkAihhW7mUxAZkklYHHme5a0tU


where

header : eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9

payload : eyJzdWIiOiJXZWxjb21lIHRvIG15IGJsb2chIiwibmFtZSI6IkFuaWtldCBUaGFrdXIiLCJpc3MiOiJvcGVuc291cmNlZm9yZ2Vla3MuYmxvZ3Nwb3QuY29tIiwiZXhwIjoxNTMwNDM2OTk4fQ

Signature : 8pmcCeFS9gx8Yb-DPRkAihhW7mUxAZkklYHHme5a0tU




You can also see this JWT in action - https://jwt.io/.

NOTE1: Notice for registered claims all claim names are 3 characters in length. This is because JWT is meant to be compact.

NOTE2: Also you must have realized by now that header and payload are just base64 URL encoded values and that anyone with access to token can decode and read these. So you must never add any sensitive information to these sections. JWT just ensures that the data cannot tamper but the data is visible to everyone with the access to the token.

How does JWT work?

Now that we saw fundamentals of JWT let's see how JWT is actually used to protect resources in an application.

For simplicity let's assume we have our own server application which has an authentication module and a set of APIs that must be protected. First user or the client would authenticate itself against the server. The server will send the JWT in the response of successful authentication call. This token will be used by the client in all subsequent API calls made to the server. You can send this token in the headers of the API call. Now in each API call server will check if the JWT is valid and process the request based on the validity of the token.

We already saw how JWT tokens are created. Now let's see how they are verified. Once the server receives JWT token it will extract payload and header section. Then it will use these parts along with the secret that is stored only on the server to calculate the signature. If the signature calculated is same as the one that is received from the JWT then the JWT is valid and the further processing can happen. If the signature does not match then it means someone has tampered with the request and it must not be allowed to execute. You can send a 401 - unauthenticated response in such cases.

Ideally, you would have a separate API server and a separate authentication server in which case you can keep the secret is a centralize place like a database and read it from both the servers to generate signatures of JWT.

Flow is something like below -





Summary

  • JWT can be used to securely share claims between two parties. This can either be used for protecting your resources on the server or sharing data securely.
  • JWT protect the data from being tampered but it does not prevent anyone from viewing that data (Since the data is just base64 encoded string). So never send sensitive data as part of JWT.
  • Always add an expiry to the JWT. So if your token is compromized for some reason it would not be valid post its expiry. The client can reach out to the server for the new token in this case. 
  • Always use https for sending JWT since it prevents unauthorized users from stealing the JWT.
  • You can use https://jwt.io/ to generate and see JWT yourself.



Related Links



Saturday, 23 June 2018

DynamoDB read and write provisioned throughput calculations

Background

Amazon DynamoDB is a fast and flexible NoSQL database service for all applications that need consistent, single-digit millisecond latency at any scale. It is a fully managed cloud database and supports both document and key-value store models. Its flexible data model, reliable performance, and automatic scaling of throughput capacity make it a great fit for mobile, web, gaming, ad tech, IoT, and many other applications.

In this post, I will show you how to calculate read and write provisioned throughput for Dynamo DB. This is a very common question asked in "AWS Certified Developer - Associate"  exam. I will also show you some of the examples to fully understand the calculations.

AWS allows us to change the read and write capacity unity of DynamoDB which lets us scale the DB based on our requirements. 



But the major question is how do you come up with this capacity unit values which is exactly what we are going to see below.

DynamoDB read and write provisioned throughput calculations


Before we head on to the calculation part let's try to understand some details about read and write throughputs in DynamoDB.

  1. Read provisioned throughput :
    • All reads are rounded to increments of 4 KB
    • Eventual consistent reads (default) consist of 2 reads per second
    • Strongly consistent reads consist of 1 read per second
  2. Write provisioned throughput:
    • All writes are rounded to increments of 1 KB
    • All writes consists of 1 write per second

Now let's see how we can compute read provisioned throughput -

DynamoDB read provisioned throughput calculation

  1. Find the read units required per item. For this, you need to round the item size to the nearest chunk of 4KB and then divide by 4. For example, if you each item size if 6KB then the nearest 4KB chunk would be 8KB and the read units required would be 8 / 4 = 2. Another example, if your item size is 1KB then your nearest 4KB chunk is 4KB and the read units required are 4 /4 =1. Let's call this value X.
  2. Now you need to calculate a number of items read per second. For example, if you are reading 120 items per minute then the number of items read per second is 120/60 = 2. Let's call this value Y.
  3. Your read capacity unit for strongly consistent reads would be (X*Y).
  4. If you are using eventual consistent reads then divide above number by 2 to get the read provisioned throughput i.e (X*Y)/2. This is because for eventual consistency case there are 2 reads per second. So to get read throughput you need to divide by 2.
Let's take some example to understand this better.


Q1. Let's say you have an application that requires to read 20 items of 2 KB per second using eventual consistent reads. What read throughout value should be set?

A. Our item size is 2KB per second. So let's first round it to nearest 4KB chunk which is nothing but 4KB. Now to get read units per item we divide by 4. So 4 /4 = 1. This is our X if you are following above method. Now the number of items read is 20 per second which is our Y. So X* Y = 1 * 20 = 20. Finally, we are saying reads are eventually consistent which means we need to divide above value further by 2. So the final read throughput is 20 / 2 = 10.

Let's see another example -

Q2. Let's say you have an application that requires to read 10 items of 10 KB per second using eventual consistent reads. What read throughout value should be set?

A. Our item size is 10KB per second. So let's first round it to nearest 4KB chunk which is nothing but 12KB. Now to get read units per item we divide by 4. So 12 /4 = 3. This is our X if you are following above method. Now the number of items read is 10 per second which is our Y. So X* Y = 3 * 10 = 30. Finally, we are saying reads are eventually consistent which means we need to divide above value further by 2. So the final read throughput is 30 / 2 = 15.


Now let's see an example with strong consistency.

Q3. Let's say you have an application that requires to read 5 items of 6 KB per second using strongly consistent reads. What read throughout value should be set?

A. Our item size is 6KB per second. So let's first round it to nearest 4KB chunk which is nothing but 8KB. Now to get read units per item we divide by 4. So 8 /4 = 2. This is our X if you are following above method. Now the number of items read is 5 per second which is our Y. So X* Y = 2 * 5 = 10. Finally, since the reads are strongly consistent you do not need to divide the result by 3. So the final read throughput is 10.
 

DynamoDB write provisioned throughput calculation

  1. Find the write units required per item. Since each write unit is 1 KB you can directly use the actual size of an item in KB as the read unit per item. For example, if you each item size if 6KB then the write units required would be 6. Another example, if your item size is 12KB then your the write units required are 12. Let's call this value X.
  2. Now you need to calculate a number of items read per second. For example, if you are reading 120 items per minute then the number of items read per second is 120/60 = 2. Let's call this value Y.
  3. Your write capacity unit for strongly consistent reads would be (X*Y). There is no notion of strongly consistent or eventually consistent write.

Let's see some examples for this.

Q1. You have an application that writes 10 items where each item is 11 KB in size per second. What should be the write throughput set to?
A. Since item size is 11 KB and each write unit is of 1KB we need 11 write units per item. This is our X. We also know the application is writing 10 items per second to the DB which is our Y value. So the write throughout is X * Y = 11 * 10 = 110.


Let's see another example -

Q2. You have an application that writes 100 items where each item is 10 KB in size per second. What should be the write throughput set to?
A. Since item size is 10 KB and each write unit is of 1KB we need 10 write units per item. This is our X. We also know the application is writing 100 items per second to the DB which is our Y value. So the write throughout is X * Y = 10 * 100 = 1000.


NOTE: Each item is nothing but a row in DynamoDB.


Related Links



Thursday, 21 June 2018

How to clean and manage "Recent Places" in Mac OS X

Background

Whenever you save files in your Mac, OS X will save this folder in a “Recent Places” category which will be available the next time you save another file. This feature enables quick access to the folders you commonly use which saves a lot of time.


But sometimes you need to have a granular control over this. For example, you may have to clear recent places or limit the number of folders that are stored under this category. In this post, I am going to show you exactly the same thing.

How to clean and manage "Recent Places" in Mac OS X

Let's start with how we can limit the number of folders that are stored in the "Recent places" category. For me by default, it stores 3 folders. It may vary as per your OS version.
To increase the limit execute following command in your terminal -

Here NUMBER is the number of entries you want. Making it  Zero will disable the recent places list. 




To remove this limit you can execute the following command -



And finally, if you want to clear recent places list then you can execute the following command -



Tuesday, 12 June 2018

AWS service limits asked in "AWS Certified Solutions Architect - Associate" and "AWS Certified Developer - Associate" certifications

Background

I just cleared my "AWS Certified Developer - Associate" certification exam yesterday with 90%. I have already cleared "AWS Certified Solutions Architect - Associate" exam 6 months back with 89%. You can see my badges below-
While preparing I realized that there are some questions based on service limits in AWS. These can be straightforward questions or they can be slightly twisted. Either case knowing service limits help out a lot. So I am going to summarize most of them which I feel important from certification perspective.




NOTE: AWS service limits can change anytime. So it is best to refer the FAQ sections of corresponding services to confirm. Following limits are as of June 2018.

AWS service limits & constraints

Following are AWS services and their corresponding limits. There would be more limits and constraints to each service. I am simply trying to summarise based on my exam preparation, test quizzes, and actual exam experience. Please let me know in comments if these limits are changed and I can update accordingly. Thanks.

Consolidated billing


AWS S3

  • By default, customers can provision up to 100 buckets per AWS account. However, you can increase your Amazon S3 bucket limit by visiting AWS Service Limits.
  • The bucket name can be between 3 and 63 characters long and can contain only lower-case characters, numbers, periods, and dashes.
  • Bucket names must not be formatted as an IP address (for example, 192.168.5.4).
  • For more details refer - https://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
  • AWS S3 offers unlimited storage
  • Each object on S3, however, can be 0 bytes to 5TB.
  • The largest object that can be uploaded in a single PUT is 5GB
  • For objects larger than 100 megabytes, customers should consider using the Multipart Upload capability.
  • For further details refer - https://aws.amazon.com/s3/faqs/

Glacier

  • There is no maximum limit to the total amount of data that can be stored in Amazon Glacier. 
  • Individual archives are limited to a maximum size of 40 terabytes.
  • For more details refer - https://aws.amazon.com/glacier/faqs/

Redshift


AWS EC2

VPC

Route 53



Cloud watch

Cloud formation

Lambda

Dynamo DB

  • There is an initial limit of 256 tables per region. You can raise a request to increase this limit.
  • You can define a maximum of 5 local secondary indexes and 5 global secondary indexes per table(hard limit) - total 10 secondary indexes
  • The maximum size of item collection is 10GB
  • The minimum amount of reserved capacity that can be bought - 100
  • The maximum item size in DynamoDB is 400 KB, which includes both attribute name binary length (UTF-8 length) and attribute value lengths (again binary length). The attribute name counts towards the size limit. No limit on the number of items.
  • For more details refer - https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
  • A BatchGetItem single operation can retrieve up to 16 MB of data, which can contain as many as 100 items
  • For more details refer - https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchGetItem.html
  • A single Scan operation will read up to the maximum number of items set (if using the Limit parameter) or a maximum of 1 MB of data and then apply any filtering to the results using FilterExpression.
  • For more details refer - https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_Scan.html

SQS

  • You can create any number of message queues.
  • Max configuration: 14 days retention and 12 hours visibility timeout
  • Default configuration: 4 days retention  and 30 seconds visibility timeout
  • A single request can have up to 1 to 10 messages up to a maximum payload of 256KB.
  • Each 64 kb chunk payload is billed as 1 request. So a single API call with 256kb payload will be billed as 4 requests.
  • To configure the maximum message size, use the console or the SetQueueAttributes method to set the MaximumMessageSize attribute. This attribute specifies the limit on bytes that an Amazon SQS message can contain. Set this limit to a value between 1,024 bytes (1 KB), and 262,144 bytes (256 KB).
  • For more details refer - https://aws.amazon.com/sqs/faqs/

SNS

  • By default, SNS offers 10 million subscriptions per topic and 100,000 topics per account.  To request a higher limit, please contact Support.
  • Topic names are limited to 256 characters.
  • SNS subscription confirmation time period is 3 days

SWF



Again as mentioned before this is obviously not an exhaustive list but merely a summary of what I thought could be best to revise before going to the associate exams. Let me know if you think something else needs to be added here for the benefit of everyone.


Since you have taken time to go through the limits here is a bonus question for you :)

Question: You receive a call from a potential client who explains that one of the many services they offer is a website running on a t2.micro EC2 instance where users can submit requests for customized e-cards to be sent to their friends and family. The e-card website administrator was on a cruise and was shocked when he returned to the office in mid-January to find hundreds of angry emails complaining that customers' loved ones had not received their Christmas cards. He also had several emails from CloudWatch alerting him that the SQS queue for the e-card application had grown to over 500 messages on December 25th. You investigate and find that the problem was caused by a crashed EC2 instance which serves as an application server. What do you advise your client to do first? Choose the correct answer from the options below

Options:
  1. Use an autoscaling group to create as many application servers as needed to access all of the Christmas card SQS messages.
  2. Reboot the application server immediately so that it begins processing the Christmas cards SQS messages.
  3. Redeploy the application server as larger instance type so that it processed the  Christmas cards SQS faster.
  4. Send an apology to the customer notifying them that their cards will not be delivered.

Answer:
4. Send an apology to the customer notifying them that their cards will not be delivered.

Explanation:
Since 500 message count was as of December 25th and e-card website administrator returned mid-Jan the difference is more than 14 days which is the maximum retention period for SQS messages.

To be honest I had select option 1 in my 1st attempt :)


Related Links



Friday, 25 May 2018

Understanding difference between Cognito User Pool vs Identity Pool

Background

In one of the previous post, we saw how to setup Cognito Identity Pool for unauthenticated or authenticated access to AWS resources like S3. Cognito identity pool is used to federate users into AWS so that they can call AWS services. In this post, we are going to see what is the difference between Cognito user wool and identity pool.


Amazon Cognito User Pools

As per AWS documentation,

A user pool is a user directory in Amazon Cognito. With a user pool, your users can sign in to your web or mobile app through Amazon Cognito. Your users can also sign in through social identity providers like Facebook or Amazon, and through SAML identity providers. Whether your users sign-in directly or through a third party, all members of the user pool have a directory profile that you can access through an SDK.

User pools provide:


  • Sign-up and sign-in services.
  • A built-in, customizable web UI to sign in users.
  • Social sign-in with Facebook, Google, and Login with Amazon, as well as sign-in with SAML identity providers from your user pool.
  • User directory management and user profiles.
  • Security features such as multi-factor authentication (MFA), checks for compromised credentials, account takeover protection, and phone and email verification.
  • Customized workflows and user migration through AWS Lambda triggers.


Sourcehttps://docs.aws.amazon.com/cognito/latest/developerguide/cognito-user-identity-pools.html

After successfully authenticating a user, Amazon Cognito issues JSON web tokens (JWT) that you can use to secure and authorize access to your own APIs, or exchange for AWS credentials.

Amazon Cognito Identity Pools (Federated Identities)


As per AWS documentation,

Amazon Cognito identity pools (federated identities) enable you to create unique identities for your users and federate them with identity providers. With an identity pool, you can obtain temporary, limited-privilege AWS credentials to access other AWS services.

Sourcehttps://docs.aws.amazon.com/cognito/latest/developerguide/cognito-identity.html

Understanding difference between Cognito User Pool vs Identity Pool

Above definitions can be very confusing, so let me simplify them for you.

Cognito user pool is nothing but your user management system backed by its own user directory. If you are building a new app or a website and you want to add authentication mechanism to sign in or sign up for your users you should use Cognito user pool. You can choose to have users sign in with an email address, phone number, username or preferred username plus their password. You can also use social identity providers for authentication and signing in or up for your app or website. Everything is under Cognito user pool umbrella. You can use their SDK provided to do this. Cognito user pool helps you maintain your user base details and their authentication. On successful authentication, it provides a JWT token that can be used to authenticate your custom server APIs as well.



Cognito identity pool is used when you need access to AWS services. It basically authenticates the user and if authentication is successful it will give you a temporary token that you can use to talk to AWS. For eg. let's say you want to upload a file to S3 then you can use this. I had written a post earlier to do just the same -
Now how do you authenticate users to get these temporary credentials is something you decide. You could authenticate against the Cognito user group you have already created or you can again use 3rd party authentication providers like -
  • Amazon
  • Google
  • Facebook etc
Authentication flow is as -



So to summarize if you want to build a user directory with sign-in / sign-up functionality Cognito user pool is the way to go and if you just want access to AWS services without worrying about maintaining user database of your own you can use Cognito identity pool.




Related Links

Thursday, 24 May 2018

How to check an active internet connection on iPhone/iOS

Background

There are various network calls that we may have to make from your device to server. Internet network connection may not always be available for review. So it is better if we can check if the network is reachable before making the API calls. In this post, we will see the same in Objective C.


How to check an active internet connection on iPhone/iOS

  • First, add SystemConfiguration framework to your project.
  • To do this double click on your project. This should open your project settings. Select your app target.



  • Now go to "Build Phases" Tab on the top
  • Now Expand "Link Binary With Libraries"
  • Now click on '+' icon and search for "SystemConfiguration"
  • You should see "SystemConfiguration.framework"
  • Select it and click Add
  • You should see it would be added under "Link Binary With Libraries"


  • You should also start seeing "Framework" group under project navigator -

  • Now add Tony Million's version of Reachability.h and Reachability.m to the project. You can find it here - https://github.com/tonymillion/Reachability
  • I have just created a new group called Reachability and added these file in it.


  • In the file where you want to make this change import Reachability 
    • #import "Reachability.h"
  • Now you can check if the internet is available with following code snippet -


    if ([[Reachability reachabilityForInternetConnection]currentReachabilityStatus]==NotReachable)
    {
        //connection unavailable
        NSLog(@"Connection is not available");
    }
    else
    {
        //connection available
         NSLog(@"Connection is available");
    }



NOTE#import <SystemConfiguration/SystemConfiguration.h> is inside Reachability.h. You do not have to explicitly import it anywhere. Just add it to the framework.




Related Links


Tuesday, 22 May 2018

How to upload files to S3 from iOS app written in Objective C using AWS Cognito identity pool

Background

This post assumes you have setup Cognito identity pool as explained in the previous post -
If not, then please refer the previous post and set that up. Before starting with using this you should have -
  1. Identity pool ID
  2. AWS region where pool and S3 bucket reside
  3. S3 bucket name
We will use above in configuration and implementation that follows.

How to upload files to S3 from iOS app written in Objective C using AWS Cognito identity pool

  • First, go to your pod file and update following dependencies -

# Uncomment the next line to define a global platform for your project
# platform :ios, '9.0'


target 'TestApp' do
  # Uncomment the next line if you're using Swift or would like to use dynamic frameworks
  # use_frameworks!
  # Pods for TestApp


pod 'AWSMobileClient', '~> 2.6.18'  # For AWSMobileClient
pod 'AWSS3', '~> 2.6.18'            # For file transfers
pod 'AWSCognito', '~> 2.6.18'       # For data sync

end

  • Next run "Run pod install --repo-update" from the command line
  • Once you have the dependencies installed we can now write Objective C code to upload a file to S3.


//
//  FileUploader.m
//  TestApp
//
//  Created by Aniket on 22/05/18.
//


#import <Foundation/Foundation.h>


#import "FileUploader.h"
#import <AWSS3/AWSS3.h>
#import <AWSCore/AWSCore.h>




@implementation FileUploader


static AWSS3TransferManager *transferManager;


+ (void) initialize {
    
    if (self == [FileUploader class]) {
        AWSCognitoCredentialsProvider *credentialsProvider = [[AWSCognitoCredentialsProvider alloc] initWithRegionType:AWSRegionUSEast1 identityPoolId:@"us-east-1:f847843f-0162-43c2-b73f-efdc7c69cce2"];
        AWSServiceConfiguration *configuration = [[AWSServiceConfiguration alloc] initWithRegion:AWSRegionUSEast1 credentialsProvider:credentialsProvider];
        [AWSS3TransferManager registerS3TransferManagerWithConfiguration:[[AWSServiceConfiguration alloc] initWithRegion:AWSRegionUSEast1 credentialsProvider:credentialsProvider] forKey:s3TransferManagerKey];


        AWSServiceManager.defaultServiceManager.defaultServiceConfiguration = configuration;
                                        
        transferManager = [AWSS3TransferManager S3TransferManagerForKey:s3TransferManagerKey];
    }
    
}


+ (void)uploadFile
{
    
    
    NSURL *uploadingFileURL = [NSURL fileURLWithPath: @"PATH_TO_FILE";
    AWSS3TransferManagerUploadRequest *uploadRequest = [AWSS3TransferManagerUploadRequest new];
    
    
    uploadRequest.bucket = s3Bucket;
    int timestamp = [[NSDate date] timeIntervalSince1970];
    uploadRequest.key = [NSString stringWithFormat:@"%@-%d%@",@"testfile",timestamp,@".txt"];
    uploadRequest.body = uploadingFileURL;
    
    [[transferManager upload:uploadRequest] continueWithExecutor:[AWSExecutor mainThreadExecutor]
                                                       withBlock:^id(AWSTask *task) {
                                                           if (task.error) {
                                                               if ([task.error.domain isEqualToString:AWSS3TransferManagerErrorDomain]) {
                                                                   switch (task.error.code) {
                                                                       case AWSS3TransferManagerErrorCancelled:
                                                                       case AWSS3TransferManagerErrorPaused:
                                                                           break;
                                                                           
                                                                       default:
                                                                           NSLog(@"Error uploading file to S3: %@", task.error);
                                                                           break;
                                                                   }
                                                               } else {
                                                                   // Unknown error.
                                                                   NSLog(@"Error uploading file to S3: %@", task.error);
                                                               }
                                                           }
                                                           
                                                           if (task.result) {
                                                               AWSS3TransferManagerUploadOutput *uploadOutput = task.result;
                                                               // The file uploaded successfully.
                                                               NSLog(@"uploading file to S3 was successful: %@", uploadOutput);
                                                           }
                                                           return nil;
                                                       }];
    
}
@end


In above code replace the identity pool id and region as per your configuration settings. This is Objective C code, if you want to see Swift or Android - Java please refer https://docs.aws.amazon.com/aws-mobile/latest/developerguide/how-to-integrate-an-existing-bucket.html


Related Links

t> UA-39527780-1 back to top